242 lines
6.5 KiB
Python
242 lines
6.5 KiB
Python
import sys
|
|
|
|
import llvm
|
|
if llvm.version < (3, 4):
|
|
raise Exception("mc is not supported for llvm version less than 3.4")
|
|
|
|
from io import BytesIO
|
|
import contextlib
|
|
|
|
from llvmpy import api, extra
|
|
from llvmpy.api.llvm import MCDisassembler
|
|
|
|
class Operand(object):
|
|
|
|
def __init__(self, mcoperand, target_machine):
|
|
'''
|
|
@mcoperand: an MCOperand object
|
|
@target_machine: an llvm.target.TargetMachine object
|
|
'''
|
|
|
|
self.op = mcoperand
|
|
if not self.op:
|
|
raise llvm.LLVMException("null MCOperand argument")
|
|
|
|
self.tm = target_machine
|
|
|
|
def __str__(self):
|
|
s = "invalid"
|
|
if self.is_reg():
|
|
s = "reg(%s)" % (self.reg_name())
|
|
elif self.is_imm():
|
|
s = "imm(0x%02x)" % (self.op.getImm())
|
|
elif self.is_fp_imm():
|
|
s = "imm(%r)" % (self.op.getFPImm())
|
|
elif self.is_expr():
|
|
s = "expr(%r)" % (self.op.getExpr().getKind())
|
|
elif self.is_inst():
|
|
s = repr(Instr(self.op.getInst()))
|
|
|
|
return s
|
|
|
|
def __repr__(self):
|
|
return str(self)
|
|
|
|
def reg_name(self):
|
|
if self.is_reg():
|
|
s = self.tm.reg_info.getName(self.op.getReg())
|
|
if s.strip() == "":
|
|
return "?"
|
|
else:
|
|
return s
|
|
else:
|
|
return ""
|
|
|
|
def is_reg(self):
|
|
return self.op.isReg()
|
|
|
|
def is_imm(self):
|
|
return self.op.isImm()
|
|
|
|
def is_fp_imm(self):
|
|
return self.op.isFPImm()
|
|
|
|
def is_expr(self):
|
|
return self.op.isExpr()
|
|
|
|
def is_inst(self):
|
|
return self.op.isInst()
|
|
|
|
def get_imm(self):
|
|
if self.is_imm():
|
|
return self.op.getImm()
|
|
else:
|
|
return None
|
|
|
|
def get_fp_imm(self):
|
|
if self.is_fp_imm():
|
|
return self.op.getFPImm()
|
|
else:
|
|
return None
|
|
|
|
def get_inst(self):
|
|
if self.is_inst():
|
|
return Instr(self.op.getInst())
|
|
else:
|
|
return None
|
|
|
|
class Instr(object):
|
|
|
|
def __init__(self, mcinst, target_machine):
|
|
'''
|
|
@mcinst: an MCInst object
|
|
@target_machine: an llvm.target.TargetMachine object
|
|
'''
|
|
|
|
self.mcinst = mcinst
|
|
if not self.mcinst:
|
|
raise llvm.LLVMException("null MCInst argument")
|
|
|
|
self.tm = target_machine
|
|
|
|
def __str__(self):
|
|
os = extra.make_raw_ostream_for_printing()
|
|
self.tm.inst_printer.printInst(self.mcinst, os, "")
|
|
return str(os.str())
|
|
|
|
def __repr__(self):
|
|
return str(self)
|
|
|
|
def __len__(self):
|
|
''' the number of operands '''
|
|
return int(self.mcinst.size())
|
|
|
|
def operands(self):
|
|
amt = self.mcinst.getNumOperands()
|
|
if amt < 1:
|
|
return []
|
|
|
|
l = []
|
|
for i in range(0, amt):
|
|
l.append(Operand(self.mcinst.getOperand(i), self.tm))
|
|
|
|
return l
|
|
|
|
@property
|
|
def instr_desc(self):
|
|
return self.tm.instr_info.get(self.opcode)
|
|
|
|
@property
|
|
def flags(self):
|
|
return self.instr_desc.getFlags()
|
|
|
|
@property
|
|
def ts_flags(self):
|
|
return self.instr_desc.TSFlags
|
|
|
|
@property
|
|
def opcode(self):
|
|
return self.mcinst.getOpcode()
|
|
|
|
def is_branch(self):
|
|
return self.instr_desc.isBranch()
|
|
|
|
def is_cond_branch(self):
|
|
return self.instr_desc.isConditionalBranch()
|
|
|
|
def is_uncond_branch(self):
|
|
return self.instr_desc.isUnconditionalBranch()
|
|
|
|
def is_indirect_branch(self):
|
|
return self.instr_desc.isIndirectBranch()
|
|
|
|
def is_call(self):
|
|
return self.instr_desc.isCall()
|
|
|
|
def is_return(self):
|
|
return self.instr_desc.isReturn()
|
|
|
|
def is_terminator(self):
|
|
return self.instr_desc.isTerminator()
|
|
|
|
def is_barrier(self):
|
|
return self.instr_desc.isBarrier()
|
|
|
|
class BadInstr(Instr):
|
|
pass
|
|
|
|
class Disassembler(object):
|
|
|
|
def __init__(self, target_machine):
|
|
self.tm = target_machine
|
|
|
|
@property
|
|
def mdasm(self):
|
|
return self.tm.disassembler
|
|
|
|
@property
|
|
def mai(self):
|
|
return self.tm.asm_info
|
|
|
|
def instr(self, mcinst):
|
|
return Instr(mcinst, self.tm)
|
|
|
|
def bad_instr(self, mcinst):
|
|
return BadInstr(mcinst, self.tm)
|
|
|
|
def decode(self, bs, base_addr, align=None):
|
|
'''
|
|
decodes the bytes in @bs into instructions and yields
|
|
each instruction as it is decoded. @base_addr is the base address
|
|
where the instruction bytes are from (not an offset into
|
|
@bs). yields instructions in the form of (addr, data, inst) where
|
|
addr is an integer, data is a tuple of integers and inst is an instance of
|
|
llvm.mc.Instr. @align specifies the byte alignment of instructions and
|
|
is only used if an un-decodable instruction is encountered, in which
|
|
case the disassembler will skip the following bytes until the next
|
|
aligned address. if @align is unspecified, the default alignment
|
|
for the architecture will be used, however this may not be ideal
|
|
for disassembly. for example, the default alignment for ARM is 1, but you
|
|
probably want it to be 4 for the purposes of disassembling ARM
|
|
instructions.
|
|
'''
|
|
|
|
if isinstance(bs, str) and sys.version_info.major >= 3:
|
|
bs = bytes(map(lambda c: ord(c), bs))
|
|
elif not isinstance(bs, bytes):
|
|
raise TypeError("expected bs to be either 'str' or 'bytes' but got %s" % type(bs))
|
|
|
|
code = api.llvm.StringRefMemoryObject.new(bs, base_addr)
|
|
idx = 0
|
|
if not isinstance(align, int) or align < 1:
|
|
align = self.mai.getMinInstAlignment()
|
|
|
|
while(idx < code.getExtent()):
|
|
inst = api.llvm.MCInst.new()
|
|
addr = code.getBase() + idx
|
|
status, size = self.mdasm.getInstruction(inst, code, addr)
|
|
|
|
if size < 1:
|
|
size = (align - (idx % align))
|
|
|
|
amt_left = code.getExtent() - idx
|
|
if amt_left >= size:
|
|
data = code.readBytes(addr, size)
|
|
elif amt_left < 1:
|
|
break
|
|
else:
|
|
data = code.readBytes(addr, amt_left)
|
|
|
|
if sys.version_info.major < 3:
|
|
data = tuple(map(lambda b: ord(b), data))
|
|
else:
|
|
data = tuple(data)
|
|
|
|
if status == MCDisassembler.DecodeStatus.Fail:
|
|
yield (addr, data, None)
|
|
elif status == MCDisassembler.DecodeStatus.SoftFail:
|
|
yield (addr, data, self.bad_instr(inst))
|
|
else:
|
|
yield (addr, data, self.instr(inst))
|
|
|
|
idx += size
|