From e6e146f84fbfffce419616bb0cbc31ed1d9608b8 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 21 Jun 2020 07:58:09 -0400 Subject: [PATCH] Redo disasembler options handling -- * --show-bytes and --asm options removed. Use --asm-format -F instead. * asm_fmt is now a string choice. * Don't show integer arg value when there is something more useful to show. Note that -F=bytes will show this separately --- xdis/__init__.py | 2 +- xdis/bin/pydisasm.py | 23 +-- xdis/bytecode.py | 9 +- xdis/instruction.py | 101 ++++++------- xdis/main.py | 347 ------------------------------------------- xdis/std.py | 2 +- xdis/version.py | 2 +- 7 files changed, 65 insertions(+), 421 deletions(-) delete mode 100644 xdis/main.py diff --git a/xdis/__init__.py b/xdis/__init__.py index e915456b..f7db0c0c 100644 --- a/xdis/__init__.py +++ b/xdis/__init__.py @@ -87,7 +87,7 @@ lineoffsets_in_module, ) -from xdis.main import ( +from xdis.disasm import ( get_opcode, show_module_header, disco_loop, diff --git a/xdis/bin/pydisasm.py b/xdis/bin/pydisasm.py index ed88470f..de149843 100644 --- a/xdis/bin/pydisasm.py +++ b/xdis/bin/pydisasm.py @@ -1,5 +1,5 @@ # Mode: -*- python -*- -# Copyright (c) 2015-2019 by Rocky Bernstein +# Copyright (c) 2015-2020 by Rocky Bernstein # # Note: we can't start with #! because setup.py bdist_wheel will look for that # and change that into something that's not portable. Thank you, Python! @@ -21,23 +21,14 @@ @click.command() @click.option( - "--asm/--noasm", - default=False, - help="Produce output suitable for the xasm assembler", -) -@click.option( - "--show-bytes/--noshow-bytes", - default=False, - help="include bytecode bytes in output", + "--format", + "-F", + type=click.Choice(["xasm", "bytes", "std", "extended", "header"], + case_sensitive=False), ) @click.version_option(version=VERSION) -@click.option( - "--header/--no-header", - default=False, - help="Show only the module header information", -) @click.argument("files", nargs=-1, type=click.Path(readable=True), required=True) -def main(asm, show_bytes, header, files): +def main(format, files): """Disassembles a Python bytecode file. We handle bytecode for virtually every release of Python and some releases of PyPy. @@ -72,7 +63,7 @@ def main(asm, show_bytes, header, files): ) continue - disassemble_file(path, sys.stdout, asm, header, show_bytes) + disassemble_file(path, sys.stdout, format) return diff --git a/xdis/bytecode.py b/xdis/bytecode.py index e4d23da8..e29bd92f 100644 --- a/xdis/bytecode.py +++ b/xdis/bytecode.py @@ -269,7 +269,7 @@ def info(self): """Return formatted information about the code object.""" return format_code_info(self.codeobj, self.opc.version) - def dis(self, asm_format=False, show_bytes=False): + def dis(self, asm_format="std"): """Return a formatted view of the bytecode operations.""" co = self.codeobj if self.current_offset is not None: @@ -291,8 +291,7 @@ def dis(self, asm_format=False, show_bytes=False): line_offset=self._line_offset, file=output, lasti=offset, - asm_format=asm_format, - show_bytes=show_bytes) + asm_format=asm_format) return output.getvalue() def distb(self, tb=None): @@ -308,7 +307,7 @@ def distb(self, tb=None): def disassemble_bytes(self, code, lasti=-1, varnames=None, names=None, constants=None, cells=None, linestarts=None, file=sys.stdout, line_offset=0, - asm_format=False, show_bytes=False): + asm_format="std"): # Omit the line number column entirely if we have no line number info show_lineno = linestarts is not None # TODO?: Adjust width upwards if max(linestarts.values()) >= 1000? @@ -323,7 +322,7 @@ def disassemble_bytes(self, code, lasti=-1, varnames=None, names=None, file.write("\n") is_current_instr = instr.offset == lasti file.write(instr.disassemble(lineno_width, is_current_instr, - asm_format, show_bytes) + asm_format) + "\n") pass return diff --git a/xdis/instruction.py b/xdis/instruction.py index 719f9369..4b91a87b 100644 --- a/xdis/instruction.py +++ b/xdis/instruction.py @@ -23,9 +23,11 @@ from collections import namedtuple -_Instruction = namedtuple("_Instruction", - "opname opcode optype inst_size arg argval argrepr has_arg offset starts_line is_jump_target has_extended_arg") - # "opname opcode optype inst_size arg argval argrepr has_arg offset starts_line is_jump_target has_extended_arg fallthrough") +_Instruction = namedtuple( + "_Instruction", + "opname opcode optype inst_size arg argval argrepr has_arg offset starts_line is_jump_target has_extended_arg", +) +# "opname opcode optype inst_size arg argval argrepr has_arg offset starts_line is_jump_target has_extended_arg fallthrough") try: _Instruction.opname.__doc__ = "Human readable name for operation" _Instruction.opcode.__doc__ = "Numeric code for operation" @@ -34,15 +36,22 @@ _Instruction.argrepr.__doc__ = "Human readable description of operation argument" _Instruction.has_arg.__doc__ = "True if instruction has an operand, otherwise False" _Instruction.offset.__doc__ = "Start index of operation within bytecode sequence" - _Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None" - _Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False" - _Instruction.has_extended_arg.__doc__ = "True there were EXTENDED_ARG opcodes before this, otherwise False" + _Instruction.starts_line.__doc__ = ( + "Line started by this opcode (if any), otherwise None" + ) + _Instruction.is_jump_target.__doc__ = ( + "True if other code jumps to here, otherwise False" + ) + _Instruction.has_extended_arg.__doc__ = ( + "True there were EXTENDED_ARG opcodes before this, otherwise False" + ) except: pass _OPNAME_WIDTH = 20 _OPARG_WIDTH = 6 + class Instruction(_Instruction): """Details for a bytecode operation @@ -71,70 +80,66 @@ class Instruction(_Instruction): instruction. Note conditionals are in this category, but returns, raise, and unconditional jumps are not """ + # FIXME: remove has_arg from initialization but keep it as a field. - def disassemble(self, lineno_width=3, - mark_as_current=False, - asm_format=False, - show_bytes=False): + def disassemble(self, lineno_width=3, mark_as_current=False, asm_format="std"): """Format instruction details for inclusion in disassembly output *lineno_width* sets the width of the line number field (0 omits it) *mark_as_current* inserts a '-->' marker arrow as part of the line """ fields = [] - if asm_format: - indexed_operand = set(['name', 'local', 'compare', 'free']) + indexed_operand = set(["name", "local", "compare", "free"]) # Column: Source code line number if lineno_width: if self.starts_line is not None: - if asm_format: + if asm_format == "xasm": lineno_fmt = "%%%dd:\n" % lineno_width fields.append(lineno_fmt % self.starts_line) - fields.append(' ' * (lineno_width)) + fields.append(" " * (lineno_width)) if self.is_jump_target: - fields.append(' ' * (lineno_width-1)) + fields.append(" " * (lineno_width - 1)) else: lineno_fmt = "%%%dd:" % lineno_width fields.append(lineno_fmt % self.starts_line) else: - fields.append(' ' * (lineno_width+1)) + fields.append(" " * (lineno_width + 1)) # Column: Current instruction indicator - if mark_as_current and not asm_format: - fields.append('-->') + if mark_as_current and asm_format != "xasm": + fields.append("-->") else: - fields.append(' ') + fields.append(" ") # Column: Jump target marker if self.is_jump_target: - if not asm_format: - fields.append('>>') + if asm_format != "xasm": + fields.append(">>") else: fields = ["L%d:\n" % self.offset] + fields if not self.starts_line: - fields.append(' ') + fields.append(" ") else: - fields.append(' ') + fields.append(" ") # Column: Instruction offset from start of code sequence if not asm_format: fields.append(repr(self.offset).rjust(4)) - if show_bytes: + if asm_format == "bytes": hex_bytecode = "|%02x" % self.opcode if self.inst_size == 1: # Not 3.6 or later - hex_bytecode += ' ' * (2*3) + hex_bytecode += " " * (2 * 3) if self.inst_size == 2: # Must by Python 3.6 or later if self.has_arg: hex_bytecode += " %02x" % (self.arg % 256) - else : - hex_bytecode += ' 00' + else: + hex_bytecode += " 00" elif self.inst_size == 3: # Not 3.6 or later - hex_bytecode += " %02x %02x" % ( - (self.arg >> 8, self.arg % 256)) + hex_bytecode += " %02x %02x" % divmod(self.arg, 256) - fields.append(hex_bytecode + '|') + fields.append(hex_bytecode + "|") # Column: Opcode name fields.append(self.opname.ljust(_OPNAME_WIDTH)) @@ -142,33 +147,31 @@ def disassemble(self, lineno_width=3, # Column: Opcode argument if self.arg is not None: argrepr = self.argrepr - if asm_format: - if self.optype == 'jabs': - fields.append('L' + str(self.arg)) - elif self.optype == 'jrel': + if asm_format == "xasm": + if self.optype == "jabs": + fields.append("L" + str(self.arg)) + elif self.optype == "jrel": argval = self.offset + self.arg + self.inst_size - fields.append('L' + str(argval)) + fields.append("L" + str(argval)) elif self.optype in indexed_operand: - fields.append('(%s)' % argrepr) + fields.append("(%s)" % argrepr) argrepr = None - elif (self.optype == 'const' - and not re.search(r'\s', argrepr)): - fields.append('(%s)' % argrepr) + elif self.optype == "const" and not re.search(r"\s", argrepr): + fields.append("(%s)" % argrepr) argrepr = None - elif (self.optype == 'const' - and not re.search(r'\s', argrepr)): - fields.append('(%s)' % argrepr) + elif self.optype == "const" and not re.search(r"\s", argrepr): + fields.append("(%s)" % argrepr) argrepr = None else: fields.append(repr(self.arg)) - elif not (show_bytes and argrepr): - fields.append(repr(self.arg).rjust(_OPARG_WIDTH)) + elif not (asm_format != "compact" and argrepr): + fields.append(repr(self.arg)) # Column: Opcode argument details if argrepr: - fields.append('(%s)' % argrepr) + fields.append("(%s)" % argrepr) pass pass - return ' '.join(fields).rstrip() + return " ".join(fields).rstrip() def is_jump(self): """ @@ -180,10 +183,8 @@ def jumps_forward(self): """ Return True if instruction is jump backwards """ - return ( - self.is_jump() - and self.offset < self.argval - ) + return self.is_jump() and self.offset < self.argval + # if __name__ == '__main__': # pass diff --git a/xdis/main.py b/xdis/main.py deleted file mode 100644 index 381f74b3..00000000 --- a/xdis/main.py +++ /dev/null @@ -1,347 +0,0 @@ -# Copyright (c) 2016-2018, 2020 by Rocky Bernstein -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -""" -CPython independent disassembly routines - -There are two reasons we can't use Python's built-in routines -from dis. First, the bytecode we are extracting may be from a different -version of Python (different magic number) than the version of Python -that is doing the extraction. - -Second, we need structured instruction information for the -(de)-parsing step. Python 3.4 and up provides this, but we still do -want to run on Python 2.7. -""" - -# Note: we tend to eschew new Python 3 things, and even future -# imports so this can run on older Pythons. This is -# intended to be a more cross-version Python program - -import datetime, os, re, sys, types -from collections import deque - -import xdis - -from xdis import IS_PYPY, PYTHON_VERSION -from xdis.bytecode import Bytecode -from xdis.codetype import iscode, codeType2Portable -from xdis.load import check_object_path, load_module -from xdis.magics import PYTHON_MAGIC_INT -from xdis.cross_dis import format_code_info -from xdis.version import VERSION -from xdis.op_imports import op_imports - - -def get_opcode(version, is_pypy): - # Set up disassembler with the right opcodes - if type(version) in (list, tuple): - version = ".".join([str(x) for x in version]) - lookup = str(version) - if is_pypy: - lookup += "pypy" - if lookup in op_imports.keys(): - return op_imports[lookup] - if is_pypy: - pypy_str = " for pypy" - else: - pypy_str = "" - raise TypeError("%s is not a Python version%s I know about" % (version, pypy_str)) - - -def show_module_header( - bytecode_version, - co, - timestamp, - out=sys.stdout, - is_pypy=False, - magic_int=None, - source_size=None, - sip_hash=None, - header=True, - show_filename=True, -): - - real_out = out or sys.stdout - if is_pypy: - co_pypy_str = "PyPy " - else: - co_pypy_str = "" - - if IS_PYPY: - run_pypy_str = "PyPy " - else: - run_pypy_str = "" - - if header: - magic_str = "" - if magic_int: - magic_str = str(magic_int) - real_out.write( - ( - "# pydisasm version %s\n# %sPython bytecode %s%s" - "\n# Disassembled from %sPython %s\n" - ) - % ( - VERSION, - co_pypy_str, - bytecode_version, - " (%s)" % magic_str, - run_pypy_str, - "\n# ".join(sys.version.split("\n")), - ) - ) - if PYTHON_VERSION < 3.0 and bytecode_version >= 3.0: - real_out.write("\n## **Warning** bytecode strings will be converted to strings.\n") - real_out.write("## To avoid loss, run this from Python 3.0 or greater\n\n") - - if timestamp is not None: - value = datetime.datetime.fromtimestamp(timestamp) - real_out.write("# Timestamp in code: %d" % timestamp) - real_out.write(value.strftime(" (%Y-%m-%d %H:%M:%S)\n")) - if source_size is not None: - real_out.write("# Source code size mod 2**32: %d bytes\n" % source_size) - if sip_hash is not None: - real_out.write("# SipHash: 0x%x\n" % sip_hash) - if show_filename: - real_out.write("# Embedded file name: %s\n" % co.co_filename) - - -def disco( - bytecode_version, - co, - timestamp, - out=sys.stdout, - is_pypy=False, - magic_int=None, - source_size=None, - sip_hash=None, - header=True, - asm_format=False, - show_bytes=False, - dup_lines=False, -): - """ - diassembles and deparses a given code block 'co' - """ - - assert iscode(co) - - show_module_header( - bytecode_version, - co, - timestamp, - out, - is_pypy, - magic_int, - source_size, - sip_hash, - header, - show_filename=False, - ) - - # store final output stream for case of error - real_out = out or sys.stdout - - if co.co_filename and not asm_format: - real_out.write(format_code_info(co, bytecode_version) + "\n") - pass - - opc = get_opcode(bytecode_version, is_pypy) - - if asm_format: - disco_loop_asm_format(opc, bytecode_version, co, real_out, {}, set([])) - else: - queue = deque([co]) - disco_loop(opc, bytecode_version, queue, real_out, show_bytes=show_bytes, - dup_lines=True) - - -def disco_loop(opc, version, queue, real_out, dup_lines=False, show_bytes=False): - """Disassembles a queue of code objects. If we discover - another code object which will be found in co_consts, we add - the new code to the list. Note that the order of code discovery - is in the order of first encountered which is not amenable for - the format used by a disassembler where code objects should - be defined before using them in other functions. - However this is not recursive and will overall lead to less - memory consumption at run time. - """ - - while len(queue) > 0: - co = queue.popleft() - if co.co_name not in ("", "?"): - real_out.write("\n" + format_code_info(co, version) + "\n") - - bytecode = Bytecode(co, opc, dup_lines=dup_lines) - real_out.write(bytecode.dis(show_bytes=show_bytes) + "\n") - - for c in co.co_consts: - if iscode(c): - queue.append(c) - pass - pass - - -def code_uniquify(basename, co_code): - # FIXME: better would be a hash of the co_code - return "%s_0x%x" % (basename, id(co_code)) - - -def disco_loop_asm_format(opc, version, co, real_out, fn_name_map, all_fns): - """Produces disassembly in a format more conducive to - automatic assembly by producing inner modules before they are - used by outer ones. Since this is recusive, we'll - use more stack space at runtime. - """ - - co = codeType2Portable(co) - co_name = co.co_name - mapped_name = fn_name_map.get(co_name, co_name) - - new_consts = [] - for c in co.co_consts: - if iscode(c): - if isinstance(c, types.CodeType): - c_compat = codeType2Portable(c) - else: - c_compat = c - - disco_loop_asm_format( - opc, version, c_compat, real_out, fn_name_map, all_fns - ) - - m = re.match(".* object <(.+)> at", str(c)) - if m: - basename = m.group(1) - if basename != "module": - mapped_name = code_uniquify(basename, c.co_code) - c_compat.co_name = mapped_name - c_compat.freeze() - new_consts.append(c_compat) - else: - new_consts.append(c) - pass - co.co_consts = new_consts - - m = re.match("^<(.+)>$", co.co_name) - if m or co_name in all_fns: - if co_name in all_fns: - basename = co_name - else: - basename = m.group(1) - if basename != "module": - mapped_name = code_uniquify(basename, co.co_code) - co_name = mapped_name - assert mapped_name not in fn_name_map - fn_name_map[mapped_name] = basename - co.co_name = mapped_name - pass - elif co_name in fn_name_map: - # FIXME: better would be a hash of the co_code - mapped_name = code_uniquify(co_name, co.co_code) - fn_name_map[mapped_name] = co_name - co.co_name = mapped_name - pass - - co = co.freeze() - all_fns.add(co_name) - if co.co_name != "" or co.co_filename: - real_out.write("\n" + format_code_info(co, version, mapped_name) + "\n") - - bytecode = Bytecode(co, opc, dup_lines=True) - real_out.write(bytecode.dis(asm_format=True) + "\n") - - -def disassemble_file( - filename, outstream=sys.stdout, asm_format=False, header=False, show_bytes=False -): - """ - disassemble Python byte-code file (.pyc) - - If given a Python source file (".py") file, we'll - try to find the corresponding compiled object. - - If that fails we'll compile internally for the Python version currently running - """ - pyc_filename = None - try: - # FIXME: add whether we want PyPy - pyc_filename = check_object_path(filename) - version, timestamp, magic_int, co, is_pypy, source_size, sip_hash = load_module(pyc_filename) - except: - - # Hack alert: we're using pyc_filename set as a proxy for whether the filename exists. - # check_object_path() will succeed if the file exists. - if pyc_filename is None: - raise - stat = os.stat(filename) - source = open(filename, "r").read() - co = compile(source, filename, "exec") - is_pypy = IS_PYPY - magic_int = PYTHON_MAGIC_INT - sip_hash = 0 - source_size = stat.st_size - timestamp = stat.st_mtime - version = PYTHON_VERSION - else: - filename = pyc_filename - - if header: - show_module_header( - version, - co, - timestamp, - outstream, - is_pypy, - magic_int, - source_size, - sip_hash, - show_filename=True, - ) - - else: - disco( - version, - co, - timestamp, - outstream, - is_pypy, - magic_int, - source_size, - sip_hash, - asm_format=asm_format, - show_bytes=show_bytes, - ) - # print co.co_filename - return filename, co, version, timestamp, magic_int, is_pypy, source_size, sip_hash - - -def _test(): - """Simple test program to disassemble a file.""" - argc = len(sys.argv) - if argc != 2: - if argc == 1 and xdis.PYTHON3: - fn = __file__ - else: - sys.stderr.write("usage: %s [-|CPython compiled file]\n" % __file__) - sys.exit(2) - else: - fn = sys.argv[1] - disassemble_file(fn) - - -if __name__ == "__main__": - _test() diff --git a/xdis/std.py b/xdis/std.py index ea10c575..fecdc944 100644 --- a/xdis/std.py +++ b/xdis/std.py @@ -51,7 +51,7 @@ from xdis import IS_PYPY from xdis.bytecode import Bytecode as _Bytecode from xdis.instruction import _Instruction -from xdis.main import disco as _disco +from xdis.disasm import disco as _disco from xdis.op_imports import get_opcode_module from xdis.cross_dis import code_info as _code_info, pretty_flags as _pretty_flags, show_code as _show_code, xstack_effect as _stack_effect diff --git a/xdis/version.py b/xdis/version.py index 7821c77b..6efce069 100644 --- a/xdis/version.py +++ b/xdis/version.py @@ -1,3 +1,3 @@ # This file is suitable for sourcing inside POSIX shell as # well as importing into Python -VERSION="4.7.0" # noqa +VERSION="5.0.0" # noqa