From 0befd3aab0ce6ddb2f694c0fabfac8b299056388 Mon Sep 17 00:00:00 2001 From: Paul Mathon Date: Fri, 13 Sep 2024 09:55:41 +0000 Subject: [PATCH 1/2] Allow large PEX while submitting skein app --- cluster_pack/skein/skein_config_builder.py | 5 +++-- cluster_pack/skein/skein_launcher.py | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/cluster_pack/skein/skein_config_builder.py b/cluster_pack/skein/skein_config_builder.py index 168866b..ae5552d 100644 --- a/cluster_pack/skein/skein_config_builder.py +++ b/cluster_pack/skein/skein_config_builder.py @@ -66,7 +66,8 @@ def build( package_path: Optional[str] = None, additional_files: Optional[List[str]] = None, tmp_dir: str = packaging._get_tmp_dir(), - process_logs: Callable[[str], Any] = None + process_logs: Callable[[str], Any] = None, + allow_large_pex: bool = True ) -> SkeinConfig: """Build the skein config for a module to execute @@ -81,7 +82,7 @@ def build( :return: SkeinConfig """ if not package_path: - package_path, _ = uploader.upload_env() + package_path, _ = uploader.upload_env(allow_large_pex=allow_large_pex) python_env_descriptor = packaging.get_pyenv_usage_from_archive(package_path) diff --git a/cluster_pack/skein/skein_launcher.py b/cluster_pack/skein/skein_launcher.py index 6348f3f..bedc1fa 100644 --- a/cluster_pack/skein/skein_launcher.py +++ b/cluster_pack/skein/skein_launcher.py @@ -25,7 +25,8 @@ def submit(skein_client: skein.Client, acquire_map_reduce_delegation_token: bool = False, pre_script_hook: Optional[str] = None, max_attempts: int = 1, max_restarts: int = 0, - process_logs: Callable[[str], Any] = None) -> str: + process_logs: Callable[[str], Any] = None, + allow_large_pex: bool = True) -> str: """Execute a python module in a skein container :param skein_client: skein.Client to use @@ -59,7 +60,8 @@ def submit(skein_client: skein.Client, package_path=package_path, additional_files=additional_files, tmp_dir=tmp_dir, - process_logs=process_logs) + process_logs=process_logs, + allow_large_pex=True) return _submit( skein_client, skein_config, From e8d86d6dfa98a339235a43484eb196a703c68dce Mon Sep 17 00:00:00 2001 From: Paul Mathon Date: Fri, 13 Sep 2024 10:33:30 +0000 Subject: [PATCH 2/2] Set default allow_large_pex to False --- cluster_pack/skein/skein_config_builder.py | 5 ++++- cluster_pack/skein/skein_launcher.py | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/cluster_pack/skein/skein_config_builder.py b/cluster_pack/skein/skein_config_builder.py index ae5552d..5ed7c7d 100644 --- a/cluster_pack/skein/skein_config_builder.py +++ b/cluster_pack/skein/skein_config_builder.py @@ -67,7 +67,7 @@ def build( additional_files: Optional[List[str]] = None, tmp_dir: str = packaging._get_tmp_dir(), process_logs: Callable[[str], Any] = None, - allow_large_pex: bool = True + allow_large_pex: bool = False ) -> SkeinConfig: """Build the skein config for a module to execute @@ -79,6 +79,9 @@ def build( :param tmp_dir: a temp dir for local files :param process_logs: hook with the local log path as a parameter, can be used to uplaod the logs somewhere + :param allow_large_pex: Creates a non-executable pex that will need to be unzipped to circumvent + python's limitation with zips > 2Gb. The file will need to be unzipped + and the entry point will be /__main__.py :return: SkeinConfig """ if not package_path: diff --git a/cluster_pack/skein/skein_launcher.py b/cluster_pack/skein/skein_launcher.py index bedc1fa..b198734 100644 --- a/cluster_pack/skein/skein_launcher.py +++ b/cluster_pack/skein/skein_launcher.py @@ -26,7 +26,7 @@ def submit(skein_client: skein.Client, pre_script_hook: Optional[str] = None, max_attempts: int = 1, max_restarts: int = 0, process_logs: Callable[[str], Any] = None, - allow_large_pex: bool = True) -> str: + allow_large_pex: bool = False) -> str: """Execute a python module in a skein container :param skein_client: skein.Client to use @@ -51,6 +51,9 @@ def submit(skein_client: skein.Client, :param max_restarts: maximum number of restarts allowed for the service :param process_logs: hook with the local log path as a parameter, can be used to uplaod the logs somewhere + :param allow_large_pex: Creates a non-executable pex that will need to be unzipped to circumvent + python's limitation with zips > 2Gb. The file will need to be unzipped + and the entry point will be /__main__.py :return: SkeinConfig """ with tempfile.TemporaryDirectory() as tmp_dir: @@ -61,7 +64,7 @@ def submit(skein_client: skein.Client, additional_files=additional_files, tmp_dir=tmp_dir, process_logs=process_logs, - allow_large_pex=True) + allow_large_pex=allow_large_pex) return _submit( skein_client, skein_config,