Skip to content

Commit

Permalink
Merge branch 'master' of github.com:rocky/python-xdis
Browse files Browse the repository at this point in the history
  • Loading branch information
rocky committed Jun 21, 2020
2 parents 1fea884 + e6e146f commit 1f796dc
Showing 1 changed file with 334 additions and 0 deletions.
334 changes: 334 additions & 0 deletions xdis/disasm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,334 @@
# Copyright (c) 2016-2018, 2020 by Rocky Bernstein
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
CPython version-independent disassembly routines
"""

# Note: we tend to eschew new Python 3 things, and even future
# imports so this can run on older Pythons. This is
# intended to be a more cross-version Python program

import datetime, os, re, sys, types
from collections import deque

import xdis

from xdis import IS_PYPY, PYTHON_VERSION
from xdis.bytecode import Bytecode
from xdis.codetype import iscode, codeType2Portable
from xdis.load import check_object_path, load_module
from xdis.magics import PYTHON_MAGIC_INT
from xdis.cross_dis import format_code_info
from xdis.version import VERSION
from xdis.op_imports import op_imports


def get_opcode(version, is_pypy):
# Set up disassembler with the right opcodes
if type(version) in (list, tuple):
version = ".".join([str(x) for x in version])
lookup = str(version)
if is_pypy:
lookup += "pypy"
if lookup in op_imports.keys():
return op_imports[lookup]
if is_pypy:
pypy_str = " for pypy"
else:
pypy_str = ""
raise TypeError("%s is not a Python version%s I know about" % (version, pypy_str))


def show_module_header(
bytecode_version,
co,
timestamp,
out=sys.stdout,
is_pypy=False,
magic_int=None,
source_size=None,
sip_hash=None,
header=True,
show_filename=True,
):

real_out = out or sys.stdout
if is_pypy:
co_pypy_str = "PyPy "
else:
co_pypy_str = ""

if IS_PYPY:
run_pypy_str = "PyPy "
else:
run_pypy_str = ""

if header:
magic_str = ""
if magic_int:
magic_str = str(magic_int)
real_out.write(
(
"# pydisasm version %s\n# %sPython bytecode %s%s"
"\n# Disassembled from %sPython %s\n"
)
% (
VERSION,
co_pypy_str,
bytecode_version,
" (%s)" % magic_str,
run_pypy_str,
"\n# ".join(sys.version.split("\n")),
)
)
if PYTHON_VERSION < 3.0 and bytecode_version >= 3.0:
real_out.write("\n## **Warning** bytecode strings will be converted to strings.\n")
real_out.write("## To avoid loss, run this from Python 3.0 or greater\n\n")

if timestamp is not None:
value = datetime.datetime.fromtimestamp(timestamp)
real_out.write("# Timestamp in code: %d" % timestamp)
real_out.write(value.strftime(" (%Y-%m-%d %H:%M:%S)\n"))
if source_size is not None:
real_out.write("# Source code size mod 2**32: %d bytes\n" % source_size)
if sip_hash is not None:
real_out.write("# SipHash: 0x%x\n" % sip_hash)
if show_filename:
real_out.write("# Embedded file name: %s\n" % co.co_filename)


def disco(
bytecode_version,
co,
timestamp,
out=sys.stdout,
is_pypy=False,
magic_int=None,
source_size=None,
sip_hash=None,
asm_format="std",
dup_lines=False,
):
"""
diassembles and deparses a given code block 'co'
"""

assert iscode(co)

show_module_header(
bytecode_version,
co,
timestamp,
out,
is_pypy,
magic_int,
source_size,
sip_hash,
header=True,
show_filename=False,
)

# store final output stream for case of error
real_out = out or sys.stdout

if co.co_filename and asm_format != "xasm":
real_out.write(format_code_info(co, bytecode_version) + "\n")
pass

opc = get_opcode(bytecode_version, is_pypy)

if asm_format == "xasm":
disco_loop_asm_format(opc, bytecode_version, co, real_out, {}, set([]))
else:
queue = deque([co])
disco_loop(opc, bytecode_version, queue, real_out, asm_format=asm_format,
dup_lines=True)


def disco_loop(opc, version, queue, real_out, dup_lines=False, asm_format="std"):
"""Disassembles a queue of code objects. If we discover
another code object which will be found in co_consts, we add
the new code to the list. Note that the order of code discovery
is in the order of first encountered which is not amenable for
the format used by a disassembler where code objects should
be defined before using them in other functions.
However this is not recursive and will overall lead to less
memory consumption at run time.
"""

while len(queue) > 0:
co = queue.popleft()
if co.co_name not in ("<module>", "?"):
real_out.write("\n" + format_code_info(co, version) + "\n")

bytecode = Bytecode(co, opc, dup_lines=dup_lines)
real_out.write(bytecode.dis(asm_format=asm_format) + "\n")

for c in co.co_consts:
if iscode(c):
queue.append(c)
pass
pass


def code_uniquify(basename, co_code):
# FIXME: better would be a hash of the co_code
return "%s_0x%x" % (basename, id(co_code))


def disco_loop_asm_format(opc, version, co, real_out, fn_name_map, all_fns):
"""Produces disassembly in a format more conducive to
automatic assembly by producing inner modules before they are
used by outer ones. Since this is recusive, we'll
use more stack space at runtime.
"""

co = codeType2Portable(co)
co_name = co.co_name
mapped_name = fn_name_map.get(co_name, co_name)

new_consts = []
for c in co.co_consts:
if iscode(c):
if isinstance(c, types.CodeType):
c_compat = codeType2Portable(c)
else:
c_compat = c

disco_loop_asm_format(
opc, version, c_compat, real_out, fn_name_map, all_fns
)

m = re.match(".* object <(.+)> at", str(c))
if m:
basename = m.group(1)
if basename != "module":
mapped_name = code_uniquify(basename, c.co_code)
c_compat.co_name = mapped_name
c_compat.freeze()
new_consts.append(c_compat)
else:
new_consts.append(c)
pass
co.co_consts = new_consts

m = re.match("^<(.+)>$", co.co_name)
if m or co_name in all_fns:
if co_name in all_fns:
basename = co_name
else:
basename = m.group(1)
if basename != "module":
mapped_name = code_uniquify(basename, co.co_code)
co_name = mapped_name
assert mapped_name not in fn_name_map
fn_name_map[mapped_name] = basename
co.co_name = mapped_name
pass
elif co_name in fn_name_map:
# FIXME: better would be a hash of the co_code
mapped_name = code_uniquify(co_name, co.co_code)
fn_name_map[mapped_name] = co_name
co.co_name = mapped_name
pass

co = co.freeze()
all_fns.add(co_name)
if co.co_name != "<module>" or co.co_filename:
real_out.write("\n" + format_code_info(co, version, mapped_name) + "\n")

bytecode = Bytecode(co, opc, dup_lines=True)
real_out.write(bytecode.dis(asm_format="asm") + "\n")


def disassemble_file(
filename, outstream=sys.stdout, asm_format="std"
):
"""
disassemble Python byte-code file (.pyc)
If given a Python source file (".py") file, we'll
try to find the corresponding compiled object.
If that fails we'll compile internally for the Python version currently running
"""
pyc_filename = None
try:
# FIXME: add whether we want PyPy
pyc_filename = check_object_path(filename)
version, timestamp, magic_int, co, is_pypy, source_size, sip_hash = load_module(pyc_filename)
except:

# Hack alert: we're using pyc_filename set as a proxy for whether the filename exists.
# check_object_path() will succeed if the file exists.
if pyc_filename is None:
raise
stat = os.stat(filename)
source = open(filename, "r").read()
co = compile(source, filename, "exec")
is_pypy = IS_PYPY
magic_int = PYTHON_MAGIC_INT
sip_hash = 0
source_size = stat.st_size
timestamp = stat.st_mtime
version = PYTHON_VERSION
else:
filename = pyc_filename

if asm_format == "header":
show_module_header(
version,
co,
timestamp,
outstream,
is_pypy,
magic_int,
source_size,
sip_hash,
show_filename=True,
)
else:
disco(
bytecode_version=version,
co=co,
timestamp=timestamp,
out=outstream,
is_pypy=is_pypy,
magic_int=magic_int,
source_size=source_size,
sip_hash=sip_hash,
asm_format=asm_format,
)
# print co.co_filename
return filename, co, version, timestamp, magic_int, is_pypy, source_size, sip_hash


def _test():
"""Simple test program to disassemble a file."""
argc = len(sys.argv)
if argc != 2:
if argc == 1 and xdis.PYTHON3:
fn = __file__
else:
sys.stderr.write("usage: %s [-|CPython compiled file]\n" % __file__)
sys.exit(2)
else:
fn = sys.argv[1]
disassemble_file(fn)


if __name__ == "__main__":
_test()

0 comments on commit 1f796dc

Please sign in to comment.