Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor bcjfilter to centralize logic and simplify consumers #189

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions libxz/SConscript
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
Import('env')

from staticx.bcjfilter import get_bcj_filter_arch
from staticx.bcjfilter import BCJFilter

# Enable the BCJ filter appropriate for this platform
bcj_filter_arch = get_bcj_filter_arch()
if bcj_filter_arch:
xz_dec_macro = 'XZ_DEC_' + bcj_filter_arch
env.Append(CPPDEFINES = {xz_dec_macro: 1})
bcj_filter = BCJFilter.for_current_arch()
if bcj_filter:
env.Append(CPPDEFINES = {bcj_filter.xz_dec_macro: 1})

libxz = env.StaticLibrary(
target = 'xz',
Expand Down
25 changes: 8 additions & 17 deletions staticx/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,23 @@
import lzma
from os.path import basename

from .bcjfilter import get_bcj_filter_arch
from .bcjfilter import BCJFilter
from .utils import get_symlink_target, make_mode_executable
from .constants import *
from .errors import *


def get_bcj_filter():
arch = get_bcj_filter_arch()
if not arch:
return None, ''

# Get the lzma module constant name and value
filt_name = 'FILTER_' + arch
filt = getattr(lzma, filt_name)

return filt, filt_name


def get_xz_filters():
"""Get lzma XZ filter chain

See https://docs.python.org/3/library/lzma.html#filter-chain-specs
"""
filters = []

# Get a BCJ filter for the current architecture
bcj_filter, bcj_filter_name = get_bcj_filter()
bcj_filter = BCJFilter.for_current_arch()
if bcj_filter:
logging.info("Using XZ BCJ filter {}".format(bcj_filter_name))
filters.append(dict(id=bcj_filter))
logging.info("Using XZ BCJ filter {}".format(bcj_filter))
filters.append(dict(id=bcj_filter.lzma_filter_id))

# The last filter in the chain must be a compression filter.
filters.append(dict(id=lzma.FILTER_LZMA2))
Expand Down
77 changes: 52 additions & 25 deletions staticx/bcjfilter.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,65 @@
import platform
import lzma

# NOTE: This is also used by libxz/SConscript

def get_bcj_filter_arch():
"""
Get an appropriate BCJ filter for the current architecture.
class BCJFilter:
__slots__ = ['arch']

Returns just the architecture part of the BCJ filter name.
This can be prepended with FILTER_ for a Python lzma module constant,
or XZ_DEC_ for an XZ Embedded decoder macro.
"""
# It's possible that the code being compressed might not match that of the
# current Python interpreter. In that case the problem becomes much more
# complicated, as we have to inspect the files to make a determination.
# This approach should be "good enough"; the worst case scenario is only a
# slightly worse compression ratio.
def __init__(self, arch):
self.arch = arch

machine = platform.machine()
def __repr__(self):
return 'BCJFilter({})'.format(self.arch)

if machine in ('i386', 'i686', 'x86_64'):
return 'X86'
def __str__(self):
return self.arch

if machine == 'ia64':
return 'IA64'
@property
def xz_dec_macro(self):
"""Get the XZ Embedded decoder macro name for this filter"""
return 'XZ_DEC_' + self.arch

if machine.startswith('arm'): # arm, armv8b, etc.
return 'ARM'
@property
def lzma_filter_id(self):
"""Get a Python lzma module filter id for this filter

# TODO: 'ARMTHUMB'
See https://docs.python.org/3/library/lzma.html#filter-chain-specs
"""
return getattr(lzma, 'FILTER_' + self.arch)

if machine.startswith('ppc'):
return 'POWERPC'
@classmethod
def for_arch(cls, arch):
"""
Get an appropriate BCJ filter for the given architecture.
"""
if arch in ('i386', 'i686', 'x86_64'):
return cls('X86')

if machine.startswith('sparc'):
return 'SPARC'
if arch == 'ia64':
return cls('IA64')

return None
if arch.startswith('arm'): # arm, armv8b, etc.
return cls('ARM')

# TODO: 'ARMTHUMB'

if arch.startswith('ppc'):
return cls('POWERPC')

if arch.startswith('sparc'):
return cls('SPARC')

return None

@classmethod
def for_current_arch(cls):
"""
Get an appropriate BCJ filter for the current architecture.
"""
# It's possible that the code being compressed might not match that of the
# current Python interpreter. In that case the problem becomes much more
# complicated, as we have to inspect the files to make a determination.
# This approach should be "good enough"; the worst case scenario is only a
# slightly worse compression ratio.
return cls.for_arch(platform.machine())