From 87379a49ab5fa5d24dd327d5087cad09511c42b0 Mon Sep 17 00:00:00 2001 From: Jonathon Reinhart Date: Sun, 3 Oct 2021 00:25:08 -0400 Subject: [PATCH] Refactor bcjfilter to centralize logic and simplify consumers --- libxz/SConscript | 9 +++--- staticx/archive.py | 25 +++++--------- staticx/bcjfilter.py | 77 ++++++++++++++++++++++++++++++-------------- 3 files changed, 64 insertions(+), 47 deletions(-) diff --git a/libxz/SConscript b/libxz/SConscript index 08849b5..84abc4c 100644 --- a/libxz/SConscript +++ b/libxz/SConscript @@ -1,12 +1,11 @@ Import('env') -from staticx.bcjfilter import get_bcj_filter_arch +from staticx.bcjfilter import BCJFilter # Enable the BCJ filter appropriate for this platform -bcj_filter_arch = get_bcj_filter_arch() -if bcj_filter_arch: - xz_dec_macro = 'XZ_DEC_' + bcj_filter_arch - env.Append(CPPDEFINES = {xz_dec_macro: 1}) +bcj_filter = BCJFilter.for_current_arch() +if bcj_filter: + env.Append(CPPDEFINES = {bcj_filter.xz_dec_macro: 1}) libxz = env.StaticLibrary( target = 'xz', diff --git a/staticx/archive.py b/staticx/archive.py index c5c14de..83cb3ff 100644 --- a/staticx/archive.py +++ b/staticx/archive.py @@ -3,32 +3,23 @@ import lzma from os.path import basename -from .bcjfilter import get_bcj_filter_arch +from .bcjfilter import BCJFilter from .utils import get_symlink_target, make_mode_executable from .constants import * from .errors import * - -def get_bcj_filter(): - arch = get_bcj_filter_arch() - if not arch: - return None, '' - - # Get the lzma module constant name and value - filt_name = 'FILTER_' + arch - filt = getattr(lzma, filt_name) - - return filt, filt_name - - def get_xz_filters(): + """Get lzma XZ filter chain + + See https://docs.python.org/3/library/lzma.html#filter-chain-specs + """ filters = [] # Get a BCJ filter for the current architecture - bcj_filter, bcj_filter_name = get_bcj_filter() + bcj_filter = BCJFilter.for_current_arch() if bcj_filter: - logging.info("Using XZ BCJ filter {}".format(bcj_filter_name)) - filters.append(dict(id=bcj_filter)) + logging.info("Using XZ BCJ filter {}".format(bcj_filter)) + filters.append(dict(id=bcj_filter.lzma_filter_id)) # The last filter in the chain must be a compression filter. filters.append(dict(id=lzma.FILTER_LZMA2)) diff --git a/staticx/bcjfilter.py b/staticx/bcjfilter.py index 52925de..477e720 100644 --- a/staticx/bcjfilter.py +++ b/staticx/bcjfilter.py @@ -1,38 +1,65 @@ import platform +import lzma # NOTE: This is also used by libxz/SConscript -def get_bcj_filter_arch(): - """ - Get an appropriate BCJ filter for the current architecture. +class BCJFilter: + __slots__ = ['arch'] - Returns just the architecture part of the BCJ filter name. - This can be prepended with FILTER_ for a Python lzma module constant, - or XZ_DEC_ for an XZ Embedded decoder macro. - """ - # It's possible that the code being compressed might not match that of the - # current Python interpreter. In that case the problem becomes much more - # complicated, as we have to inspect the files to make a determination. - # This approach should be "good enough"; the worst case scenario is only a - # slightly worse compression ratio. + def __init__(self, arch): + self.arch = arch - machine = platform.machine() + def __repr__(self): + return 'BCJFilter({})'.format(self.arch) - if machine in ('i386', 'i686', 'x86_64'): - return 'X86' + def __str__(self): + return self.arch - if machine == 'ia64': - return 'IA64' + @property + def xz_dec_macro(self): + """Get the XZ Embedded decoder macro name for this filter""" + return 'XZ_DEC_' + self.arch - if machine.startswith('arm'): # arm, armv8b, etc. - return 'ARM' + @property + def lzma_filter_id(self): + """Get a Python lzma module filter id for this filter - # TODO: 'ARMTHUMB' + See https://docs.python.org/3/library/lzma.html#filter-chain-specs + """ + return getattr(lzma, 'FILTER_' + self.arch) - if machine.startswith('ppc'): - return 'POWERPC' + @classmethod + def for_arch(cls, arch): + """ + Get an appropriate BCJ filter for the given architecture. + """ + if arch in ('i386', 'i686', 'x86_64'): + return cls('X86') - if machine.startswith('sparc'): - return 'SPARC' + if arch == 'ia64': + return cls('IA64') - return None + if arch.startswith('arm'): # arm, armv8b, etc. + return cls('ARM') + + # TODO: 'ARMTHUMB' + + if arch.startswith('ppc'): + return cls('POWERPC') + + if arch.startswith('sparc'): + return cls('SPARC') + + return None + + @classmethod + def for_current_arch(cls): + """ + Get an appropriate BCJ filter for the current architecture. + """ + # It's possible that the code being compressed might not match that of the + # current Python interpreter. In that case the problem becomes much more + # complicated, as we have to inspect the files to make a determination. + # This approach should be "good enough"; the worst case scenario is only a + # slightly worse compression ratio. + return cls.for_arch(platform.machine())