JonathonReinhart · JonathonReinhart · Oct 3, 2021
diff --git a/libxz/SConscript b/libxz/SConscript
@@ -1,12 +1,11 @@
 Import('env')
 
-from staticx.bcjfilter import get_bcj_filter_arch
+from staticx.bcjfilter import BCJFilter
 
 # Enable the BCJ filter appropriate for this platform
-bcj_filter_arch = get_bcj_filter_arch()
-if bcj_filter_arch:
-    xz_dec_macro = 'XZ_DEC_' + bcj_filter_arch
-    env.Append(CPPDEFINES = {xz_dec_macro: 1})
+bcj_filter = BCJFilter.for_current_arch()
+if bcj_filter:
+    env.Append(CPPDEFINES = {bcj_filter.xz_dec_macro: 1})
 
 libxz = env.StaticLibrary(
     target = 'xz',

diff --git a/staticx/archive.py b/staticx/archive.py
@@ -3,32 +3,23 @@
 import lzma
 from os.path import basename
 
-from .bcjfilter import get_bcj_filter_arch
+from .bcjfilter import BCJFilter
 from .utils import get_symlink_target, make_mode_executable
 from .constants import *
 from .errors import *
 
-
-def get_bcj_filter():
-    arch = get_bcj_filter_arch()
-    if not arch:
-        return None, ''
-
-    # Get the lzma module constant name and value
-    filt_name = 'FILTER_' + arch
-    filt = getattr(lzma, filt_name)
-
-    return filt, filt_name
-
-
 def get_xz_filters():
+    """Get lzma XZ filter chain
+
+    See https://docs.python.org/3/library/lzma.html#filter-chain-specs
+    """
     filters = []
 
     # Get a BCJ filter for the current architecture
-    bcj_filter, bcj_filter_name = get_bcj_filter()
+    bcj_filter = BCJFilter.for_current_arch()
     if bcj_filter:
-        logging.info("Using XZ BCJ filter {}".format(bcj_filter_name))
-        filters.append(dict(id=bcj_filter))
+        logging.info("Using XZ BCJ filter {}".format(bcj_filter))
+        filters.append(dict(id=bcj_filter.lzma_filter_id))
 
     # The last filter in the chain must be a compression filter.
     filters.append(dict(id=lzma.FILTER_LZMA2))

diff --git a/staticx/bcjfilter.py b/staticx/bcjfilter.py
@@ -1,38 +1,65 @@
 import platform
+import lzma
 
 # NOTE: This is also used by libxz/SConscript
 
-def get_bcj_filter_arch():
-    """
-    Get an appropriate BCJ filter for the current architecture.
+class BCJFilter:
+    __slots__ = ['arch']
 
-    Returns just the architecture part of the BCJ filter name.
-    This can be prepended with FILTER_ for a Python lzma module constant,
-    or XZ_DEC_ for an XZ Embedded decoder macro.
-    """
-    # It's possible that the code being compressed might not match that of the
-    # current Python interpreter. In that case the problem becomes much more
-    # complicated, as we have to inspect the files to make a determination.
-    # This approach should be "good enough"; the worst case scenario is only a
-    # slightly worse compression ratio.
+    def __init__(self, arch):
+        self.arch = arch
 
-    machine = platform.machine()
+    def __repr__(self):
+        return 'BCJFilter({})'.format(self.arch)
 
-    if machine in ('i386', 'i686', 'x86_64'):
-        return 'X86'
+    def __str__(self):
+        return self.arch
 
-    if machine == 'ia64':
-        return 'IA64'
+    @property
+    def xz_dec_macro(self):
+        """Get the XZ Embedded decoder macro name for this filter"""
+        return 'XZ_DEC_' + self.arch
 
-    if machine.startswith('arm'):   # arm, armv8b, etc.
-        return 'ARM'
+    @property
+    def lzma_filter_id(self):
+        """Get a Python lzma module filter id for this filter
 
-    # TODO: 'ARMTHUMB'
+        See https://docs.python.org/3/library/lzma.html#filter-chain-specs
+        """
+        return getattr(lzma, 'FILTER_' + self.arch)
 
-    if machine.startswith('ppc'):
-        return 'POWERPC'
+    @classmethod
+    def for_arch(cls, arch):
+        """
+        Get an appropriate BCJ filter for the given architecture.
+        """
+        if arch in ('i386', 'i686', 'x86_64'):
+            return cls('X86')
 
-    if machine.startswith('sparc'):
-        return 'SPARC'
+        if arch == 'ia64':
+            return cls('IA64')
 
-    return None
+        if arch.startswith('arm'):   # arm, armv8b, etc.
+            return cls('ARM')
+
+        # TODO: 'ARMTHUMB'
+
+        if arch.startswith('ppc'):
+            return cls('POWERPC')
+
+        if arch.startswith('sparc'):
+            return cls('SPARC')
+
+        return None
+
+    @classmethod
+    def for_current_arch(cls):
+        """
+        Get an appropriate BCJ filter for the current architecture.
+        """
+        # It's possible that the code being compressed might not match that of the
+        # current Python interpreter. In that case the problem becomes much more
+        # complicated, as we have to inspect the files to make a determination.
+        # This approach should be "good enough"; the worst case scenario is only a
+        # slightly worse compression ratio.
+        return cls.for_arch(platform.machine())