535 lines
19 KiB
Python
535 lines
19 KiB
Python
"""Disassembler of Python byte code into mnemonics."""
|
|
|
|
import sys
|
|
import types
|
|
import collections
|
|
import io
|
|
|
|
from opcode import *
|
|
from opcode import __all__ as _opcodes_all
|
|
|
|
__all__ = ["code_info", "dis", "disassemble", "distb", "disco",
|
|
"findlinestarts", "findlabels", "show_code",
|
|
"get_instructions", "Instruction", "Bytecode"] + _opcodes_all
|
|
del _opcodes_all
|
|
|
|
_have_code = (types.MethodType, types.FunctionType, types.CodeType,
|
|
classmethod, staticmethod, type)
|
|
|
|
FORMAT_VALUE = opmap['FORMAT_VALUE']
|
|
|
|
def _try_compile(source, name):
|
|
"""Attempts to compile the given source, first as an expression and
|
|
then as a statement if the first approach fails.
|
|
|
|
Utility function to accept strings in functions that otherwise
|
|
expect code objects
|
|
"""
|
|
try:
|
|
c = compile(source, name, 'eval')
|
|
except SyntaxError:
|
|
c = compile(source, name, 'exec')
|
|
return c
|
|
|
|
def dis(x=None, *, file=None, depth=None):
|
|
"""Disassemble classes, methods, functions, and other compiled objects.
|
|
|
|
With no argument, disassemble the last traceback.
|
|
|
|
Compiled objects currently include generator objects, async generator
|
|
objects, and coroutine objects, all of which store their code object
|
|
in a special attribute.
|
|
"""
|
|
if x is None:
|
|
distb(file=file)
|
|
return
|
|
# Extract functions from methods.
|
|
if hasattr(x, '__func__'):
|
|
x = x.__func__
|
|
# Extract compiled code objects from...
|
|
if hasattr(x, '__code__'): # ...a function, or
|
|
x = x.__code__
|
|
elif hasattr(x, 'gi_code'): #...a generator object, or
|
|
x = x.gi_code
|
|
elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or
|
|
x = x.ag_code
|
|
elif hasattr(x, 'cr_code'): #...a coroutine.
|
|
x = x.cr_code
|
|
# Perform the disassembly.
|
|
if hasattr(x, '__dict__'): # Class or module
|
|
items = sorted(x.__dict__.items())
|
|
for name, x1 in items:
|
|
if isinstance(x1, _have_code):
|
|
print("Disassembly of %s:" % name, file=file)
|
|
try:
|
|
dis(x1, file=file, depth=depth)
|
|
except TypeError as msg:
|
|
print("Sorry:", msg, file=file)
|
|
print(file=file)
|
|
elif hasattr(x, 'co_code'): # Code object
|
|
_disassemble_recursive(x, file=file, depth=depth)
|
|
elif isinstance(x, (bytes, bytearray)): # Raw bytecode
|
|
_disassemble_bytes(x, file=file)
|
|
elif isinstance(x, str): # Source code
|
|
_disassemble_str(x, file=file, depth=depth)
|
|
else:
|
|
raise TypeError("don't know how to disassemble %s objects" %
|
|
type(x).__name__)
|
|
|
|
def distb(tb=None, *, file=None):
|
|
"""Disassemble a traceback (default: last traceback)."""
|
|
if tb is None:
|
|
try:
|
|
tb = sys.last_traceback
|
|
except AttributeError:
|
|
raise RuntimeError("no last traceback to disassemble") from None
|
|
while tb.tb_next: tb = tb.tb_next
|
|
disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file)
|
|
|
|
# The inspect module interrogates this dictionary to build its
|
|
# list of CO_* constants. It is also used by pretty_flags to
|
|
# turn the co_flags field into a human readable list.
|
|
COMPILER_FLAG_NAMES = {
|
|
1: "OPTIMIZED",
|
|
2: "NEWLOCALS",
|
|
4: "VARARGS",
|
|
8: "VARKEYWORDS",
|
|
16: "NESTED",
|
|
32: "GENERATOR",
|
|
64: "NOFREE",
|
|
128: "COROUTINE",
|
|
256: "ITERABLE_COROUTINE",
|
|
512: "ASYNC_GENERATOR",
|
|
}
|
|
|
|
def pretty_flags(flags):
|
|
"""Return pretty representation of code flags."""
|
|
names = []
|
|
for i in range(32):
|
|
flag = 1<<i
|
|
if flags & flag:
|
|
names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
|
|
flags ^= flag
|
|
if not flags:
|
|
break
|
|
else:
|
|
names.append(hex(flags))
|
|
return ", ".join(names)
|
|
|
|
def _get_code_object(x):
|
|
"""Helper to handle methods, compiled or raw code objects, and strings."""
|
|
# Extract functions from methods.
|
|
if hasattr(x, '__func__'):
|
|
x = x.__func__
|
|
# Extract compiled code objects from...
|
|
if hasattr(x, '__code__'): # ...a function, or
|
|
x = x.__code__
|
|
elif hasattr(x, 'gi_code'): #...a generator object, or
|
|
x = x.gi_code
|
|
elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or
|
|
x = x.ag_code
|
|
elif hasattr(x, 'cr_code'): #...a coroutine.
|
|
x = x.cr_code
|
|
# Handle source code.
|
|
if isinstance(x, str):
|
|
x = _try_compile(x, "<disassembly>")
|
|
# By now, if we don't have a code object, we can't disassemble x.
|
|
if hasattr(x, 'co_code'):
|
|
return x
|
|
raise TypeError("don't know how to disassemble %s objects" %
|
|
type(x).__name__)
|
|
|
|
def code_info(x):
|
|
"""Formatted details of methods, functions, or code."""
|
|
return _format_code_info(_get_code_object(x))
|
|
|
|
def _format_code_info(co):
|
|
lines = []
|
|
lines.append("Name: %s" % co.co_name)
|
|
lines.append("Filename: %s" % co.co_filename)
|
|
lines.append("Argument count: %s" % co.co_argcount)
|
|
lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
|
|
lines.append("Number of locals: %s" % co.co_nlocals)
|
|
lines.append("Stack size: %s" % co.co_stacksize)
|
|
lines.append("Flags: %s" % pretty_flags(co.co_flags))
|
|
if co.co_consts:
|
|
lines.append("Constants:")
|
|
for i_c in enumerate(co.co_consts):
|
|
lines.append("%4d: %r" % i_c)
|
|
if co.co_names:
|
|
lines.append("Names:")
|
|
for i_n in enumerate(co.co_names):
|
|
lines.append("%4d: %s" % i_n)
|
|
if co.co_varnames:
|
|
lines.append("Variable names:")
|
|
for i_n in enumerate(co.co_varnames):
|
|
lines.append("%4d: %s" % i_n)
|
|
if co.co_freevars:
|
|
lines.append("Free variables:")
|
|
for i_n in enumerate(co.co_freevars):
|
|
lines.append("%4d: %s" % i_n)
|
|
if co.co_cellvars:
|
|
lines.append("Cell variables:")
|
|
for i_n in enumerate(co.co_cellvars):
|
|
lines.append("%4d: %s" % i_n)
|
|
return "\n".join(lines)
|
|
|
|
def show_code(co, *, file=None):
|
|
"""Print details of methods, functions, or code to *file*.
|
|
|
|
If *file* is not provided, the output is printed on stdout.
|
|
"""
|
|
print(code_info(co), file=file)
|
|
|
|
_Instruction = collections.namedtuple("_Instruction",
|
|
"opname opcode arg argval argrepr offset starts_line is_jump_target")
|
|
|
|
_Instruction.opname.__doc__ = "Human readable name for operation"
|
|
_Instruction.opcode.__doc__ = "Numeric code for operation"
|
|
_Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None"
|
|
_Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg"
|
|
_Instruction.argrepr.__doc__ = "Human readable description of operation argument"
|
|
_Instruction.offset.__doc__ = "Start index of operation within bytecode sequence"
|
|
_Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None"
|
|
_Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False"
|
|
|
|
_OPNAME_WIDTH = 20
|
|
_OPARG_WIDTH = 5
|
|
|
|
class Instruction(_Instruction):
|
|
"""Details for a bytecode operation
|
|
|
|
Defined fields:
|
|
opname - human readable name for operation
|
|
opcode - numeric code for operation
|
|
arg - numeric argument to operation (if any), otherwise None
|
|
argval - resolved arg value (if known), otherwise same as arg
|
|
argrepr - human readable description of operation argument
|
|
offset - start index of operation within bytecode sequence
|
|
starts_line - line started by this opcode (if any), otherwise None
|
|
is_jump_target - True if other code jumps to here, otherwise False
|
|
"""
|
|
|
|
def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4):
|
|
"""Format instruction details for inclusion in disassembly output
|
|
|
|
*lineno_width* sets the width of the line number field (0 omits it)
|
|
*mark_as_current* inserts a '-->' marker arrow as part of the line
|
|
*offset_width* sets the width of the instruction offset field
|
|
"""
|
|
fields = []
|
|
# Column: Source code line number
|
|
if lineno_width:
|
|
if self.starts_line is not None:
|
|
lineno_fmt = "%%%dd" % lineno_width
|
|
fields.append(lineno_fmt % self.starts_line)
|
|
else:
|
|
fields.append(' ' * lineno_width)
|
|
# Column: Current instruction indicator
|
|
if mark_as_current:
|
|
fields.append('-->')
|
|
else:
|
|
fields.append(' ')
|
|
# Column: Jump target marker
|
|
if self.is_jump_target:
|
|
fields.append('>>')
|
|
else:
|
|
fields.append(' ')
|
|
# Column: Instruction offset from start of code sequence
|
|
fields.append(repr(self.offset).rjust(offset_width))
|
|
# Column: Opcode name
|
|
fields.append(self.opname.ljust(_OPNAME_WIDTH))
|
|
# Column: Opcode argument
|
|
if self.arg is not None:
|
|
fields.append(repr(self.arg).rjust(_OPARG_WIDTH))
|
|
# Column: Opcode argument details
|
|
if self.argrepr:
|
|
fields.append('(' + self.argrepr + ')')
|
|
return ' '.join(fields).rstrip()
|
|
|
|
|
|
def get_instructions(x, *, first_line=None):
|
|
"""Iterator for the opcodes in methods, functions or code
|
|
|
|
Generates a series of Instruction named tuples giving the details of
|
|
each operations in the supplied code.
|
|
|
|
If *first_line* is not None, it indicates the line number that should
|
|
be reported for the first source line in the disassembled code.
|
|
Otherwise, the source line information (if any) is taken directly from
|
|
the disassembled code object.
|
|
"""
|
|
co = _get_code_object(x)
|
|
cell_names = co.co_cellvars + co.co_freevars
|
|
linestarts = dict(findlinestarts(co))
|
|
if first_line is not None:
|
|
line_offset = first_line - co.co_firstlineno
|
|
else:
|
|
line_offset = 0
|
|
return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
|
|
co.co_consts, cell_names, linestarts,
|
|
line_offset)
|
|
|
|
def _get_const_info(const_index, const_list):
|
|
"""Helper to get optional details about const references
|
|
|
|
Returns the dereferenced constant and its repr if the constant
|
|
list is defined.
|
|
Otherwise returns the constant index and its repr().
|
|
"""
|
|
argval = const_index
|
|
if const_list is not None:
|
|
argval = const_list[const_index]
|
|
return argval, repr(argval)
|
|
|
|
def _get_name_info(name_index, name_list):
|
|
"""Helper to get optional details about named references
|
|
|
|
Returns the dereferenced name as both value and repr if the name
|
|
list is defined.
|
|
Otherwise returns the name index and its repr().
|
|
"""
|
|
argval = name_index
|
|
if name_list is not None:
|
|
argval = name_list[name_index]
|
|
argrepr = argval
|
|
else:
|
|
argrepr = repr(argval)
|
|
return argval, argrepr
|
|
|
|
|
|
def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
|
|
cells=None, linestarts=None, line_offset=0):
|
|
"""Iterate over the instructions in a bytecode string.
|
|
|
|
Generates a sequence of Instruction namedtuples giving the details of each
|
|
opcode. Additional information about the code's runtime environment
|
|
(e.g. variable names, constants) can be specified using optional
|
|
arguments.
|
|
|
|
"""
|
|
labels = findlabels(code)
|
|
starts_line = None
|
|
for offset, op, arg in _unpack_opargs(code):
|
|
if linestarts is not None:
|
|
starts_line = linestarts.get(offset, None)
|
|
if starts_line is not None:
|
|
starts_line += line_offset
|
|
is_jump_target = offset in labels
|
|
argval = None
|
|
argrepr = ''
|
|
if arg is not None:
|
|
# Set argval to the dereferenced value of the argument when
|
|
# available, and argrepr to the string representation of argval.
|
|
# _disassemble_bytes needs the string repr of the
|
|
# raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
|
|
argval = arg
|
|
if op in hasconst:
|
|
argval, argrepr = _get_const_info(arg, constants)
|
|
elif op in hasname:
|
|
argval, argrepr = _get_name_info(arg, names)
|
|
elif op in hasjrel:
|
|
argval = offset + 2 + arg
|
|
argrepr = "to " + repr(argval)
|
|
elif op in haslocal:
|
|
argval, argrepr = _get_name_info(arg, varnames)
|
|
elif op in hascompare:
|
|
argval = cmp_op[arg]
|
|
argrepr = argval
|
|
elif op in hasfree:
|
|
argval, argrepr = _get_name_info(arg, cells)
|
|
elif op == FORMAT_VALUE:
|
|
argval = ((None, str, repr, ascii)[arg & 0x3], bool(arg & 0x4))
|
|
argrepr = ('', 'str', 'repr', 'ascii')[arg & 0x3]
|
|
if argval[1]:
|
|
if argrepr:
|
|
argrepr += ', '
|
|
argrepr += 'with format'
|
|
yield Instruction(opname[op], op,
|
|
arg, argval, argrepr,
|
|
offset, starts_line, is_jump_target)
|
|
|
|
def disassemble(co, lasti=-1, *, file=None):
|
|
"""Disassemble a code object."""
|
|
cell_names = co.co_cellvars + co.co_freevars
|
|
linestarts = dict(findlinestarts(co))
|
|
_disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
|
|
co.co_consts, cell_names, linestarts, file=file)
|
|
|
|
def _disassemble_recursive(co, *, file=None, depth=None):
|
|
disassemble(co, file=file)
|
|
if depth is None or depth > 0:
|
|
if depth is not None:
|
|
depth = depth - 1
|
|
for x in co.co_consts:
|
|
if hasattr(x, 'co_code'):
|
|
print(file=file)
|
|
print("Disassembly of %r:" % (x,), file=file)
|
|
_disassemble_recursive(x, file=file, depth=depth)
|
|
|
|
def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
|
|
constants=None, cells=None, linestarts=None,
|
|
*, file=None, line_offset=0):
|
|
# Omit the line number column entirely if we have no line number info
|
|
show_lineno = linestarts is not None
|
|
if show_lineno:
|
|
maxlineno = max(linestarts.values()) + line_offset
|
|
if maxlineno >= 1000:
|
|
lineno_width = len(str(maxlineno))
|
|
else:
|
|
lineno_width = 3
|
|
else:
|
|
lineno_width = 0
|
|
maxoffset = len(code) - 2
|
|
if maxoffset >= 10000:
|
|
offset_width = len(str(maxoffset))
|
|
else:
|
|
offset_width = 4
|
|
for instr in _get_instructions_bytes(code, varnames, names,
|
|
constants, cells, linestarts,
|
|
line_offset=line_offset):
|
|
new_source_line = (show_lineno and
|
|
instr.starts_line is not None and
|
|
instr.offset > 0)
|
|
if new_source_line:
|
|
print(file=file)
|
|
is_current_instr = instr.offset == lasti
|
|
print(instr._disassemble(lineno_width, is_current_instr, offset_width),
|
|
file=file)
|
|
|
|
def _disassemble_str(source, **kwargs):
|
|
"""Compile the source string, then disassemble the code object."""
|
|
_disassemble_recursive(_try_compile(source, '<dis>'), **kwargs)
|
|
|
|
disco = disassemble # XXX For backwards compatibility
|
|
|
|
def _unpack_opargs(code):
|
|
extended_arg = 0
|
|
for i in range(0, len(code), 2):
|
|
op = code[i]
|
|
if op >= HAVE_ARGUMENT:
|
|
arg = code[i+1] | extended_arg
|
|
extended_arg = (arg << 8) if op == EXTENDED_ARG else 0
|
|
else:
|
|
arg = None
|
|
yield (i, op, arg)
|
|
|
|
def findlabels(code):
|
|
"""Detect all offsets in a byte code which are jump targets.
|
|
|
|
Return the list of offsets.
|
|
|
|
"""
|
|
labels = []
|
|
for offset, op, arg in _unpack_opargs(code):
|
|
if arg is not None:
|
|
if op in hasjrel:
|
|
label = offset + 2 + arg
|
|
elif op in hasjabs:
|
|
label = arg
|
|
else:
|
|
continue
|
|
if label not in labels:
|
|
labels.append(label)
|
|
return labels
|
|
|
|
def findlinestarts(code):
|
|
"""Find the offsets in a byte code which are start of lines in the source.
|
|
|
|
Generate pairs (offset, lineno) as described in Python/compile.c.
|
|
|
|
"""
|
|
byte_increments = code.co_lnotab[0::2]
|
|
line_increments = code.co_lnotab[1::2]
|
|
|
|
lastlineno = None
|
|
lineno = code.co_firstlineno
|
|
addr = 0
|
|
for byte_incr, line_incr in zip(byte_increments, line_increments):
|
|
if byte_incr:
|
|
if lineno != lastlineno:
|
|
yield (addr, lineno)
|
|
lastlineno = lineno
|
|
addr += byte_incr
|
|
if line_incr >= 0x80:
|
|
# line_increments is an array of 8-bit signed integers
|
|
line_incr -= 0x100
|
|
lineno += line_incr
|
|
if lineno != lastlineno:
|
|
yield (addr, lineno)
|
|
|
|
class Bytecode:
|
|
"""The bytecode operations of a piece of code
|
|
|
|
Instantiate this with a function, method, other compiled object, string of
|
|
code, or a code object (as returned by compile()).
|
|
|
|
Iterating over this yields the bytecode operations as Instruction instances.
|
|
"""
|
|
def __init__(self, x, *, first_line=None, current_offset=None):
|
|
self.codeobj = co = _get_code_object(x)
|
|
if first_line is None:
|
|
self.first_line = co.co_firstlineno
|
|
self._line_offset = 0
|
|
else:
|
|
self.first_line = first_line
|
|
self._line_offset = first_line - co.co_firstlineno
|
|
self._cell_names = co.co_cellvars + co.co_freevars
|
|
self._linestarts = dict(findlinestarts(co))
|
|
self._original_object = x
|
|
self.current_offset = current_offset
|
|
|
|
def __iter__(self):
|
|
co = self.codeobj
|
|
return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
|
|
co.co_consts, self._cell_names,
|
|
self._linestarts,
|
|
line_offset=self._line_offset)
|
|
|
|
def __repr__(self):
|
|
return "{}({!r})".format(self.__class__.__name__,
|
|
self._original_object)
|
|
|
|
@classmethod
|
|
def from_traceback(cls, tb):
|
|
""" Construct a Bytecode from the given traceback """
|
|
while tb.tb_next:
|
|
tb = tb.tb_next
|
|
return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti)
|
|
|
|
def info(self):
|
|
"""Return formatted information about the code object."""
|
|
return _format_code_info(self.codeobj)
|
|
|
|
def dis(self):
|
|
"""Return a formatted view of the bytecode operations."""
|
|
co = self.codeobj
|
|
if self.current_offset is not None:
|
|
offset = self.current_offset
|
|
else:
|
|
offset = -1
|
|
with io.StringIO() as output:
|
|
_disassemble_bytes(co.co_code, varnames=co.co_varnames,
|
|
names=co.co_names, constants=co.co_consts,
|
|
cells=self._cell_names,
|
|
linestarts=self._linestarts,
|
|
line_offset=self._line_offset,
|
|
file=output,
|
|
lasti=offset)
|
|
return output.getvalue()
|
|
|
|
|
|
def _test():
|
|
"""Simple test program to disassemble a file."""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('infile', type=argparse.FileType(), nargs='?', default='-')
|
|
args = parser.parse_args()
|
|
with args.infile as infile:
|
|
source = infile.read()
|
|
code = compile(source, args.infile.name, "exec")
|
|
dis(code)
|
|
|
|
if __name__ == "__main__":
|
|
_test()
|