From 5d454123b62688ade80efb65dbb27b792ace3736 Mon Sep 17 00:00:00 2001 From: Yee Hing Tong Date: Sun, 8 Sep 2024 19:04:01 -0700 Subject: [PATCH] add enum to ls_files to simplify Signed-off-by: Yee Hing Tong --- flytekit/image_spec/default_builder.py | 8 +------- flytekit/image_spec/image_spec.py | 10 ++-------- flytekit/tools/fast_registration.py | 10 +--------- flytekit/tools/script_mode.py | 15 +++++++-------- .../flytekitplugins/envd/image_builder.py | 9 +-------- .../flytekit/unit/cli/pyflyte/test_script_mode.py | 9 +++++---- 6 files changed, 17 insertions(+), 44 deletions(-) diff --git a/flytekit/image_spec/default_builder.py b/flytekit/image_spec/default_builder.py index 65d60665df..af7a68a1bf 100644 --- a/flytekit/image_spec/default_builder.py +++ b/flytekit/image_spec/default_builder.py @@ -177,13 +177,7 @@ def create_docker_context(image_spec: ImageSpec, tmp_dir: Path): # what about deref_symlink? ignore = IgnoreGroup(image_spec.source_root, [GitIgnore, DockerIgnore, StandardIgnore]) - if image_spec.copy == CopyFileDetection.LOADED_MODULES: - # This is the 'auto' semantic by default used for pyflyte run, it only copies loaded .py files. - sys_modules = list(sys.modules.values()) - ls, _ = ls_files(str(image_spec.source_root), sys_modules, deref_symlinks=False, ignore_group=ignore) - else: - # This triggers listing of all files - ls, _ = ls_files(str(image_spec.source_root), [], deref_symlinks=False, ignore_group=ignore) + ls, _ = ls_files(str(image_spec.source_root), image_spec.copy, deref_symlinks=False, ignore_group=ignore) for file_to_copy in ls: rel_path = os.path.relpath(file_to_copy, start=str(image_spec.source_root)) diff --git a/flytekit/image_spec/image_spec.py b/flytekit/image_spec/image_spec.py index 2fa0269bfd..5696a1800a 100644 --- a/flytekit/image_spec/image_spec.py +++ b/flytekit/image_spec/image_spec.py @@ -5,7 +5,6 @@ import os import pathlib import re -import sys import typing from abc import abstractmethod from dataclasses import asdict, dataclass @@ -164,13 +163,8 @@ def tag(self) -> str: # todo: we should pipe through ignores from the command line here at some point. # what about deref_symlink? ignore = IgnoreGroup(self.source_root, [GitIgnore, DockerIgnore, StandardIgnore]) - if self.copy == CopyFileDetection.LOADED_MODULES: - # This is the 'auto' semantic by default used for pyflyte run, it only copies loaded .py files. - sys_modules = list(sys.modules.values()) - _, ls_digest = ls_files(str(self.source_root), sys_modules, deref_symlinks=False, ignore_group=ignore) - else: - # This triggers listing of all files, mimicking the old way of creating the tar file. - _, ls_digest = ls_files(str(self.source_root), [], deref_symlinks=False, ignore_group=ignore) + + _, ls_digest = ls_files(str(self.source_root), self.copy, deref_symlinks=False, ignore_group=ignore) # Since the source root is supposed to represent the files, store the digest into the source root as a # shortcut to represent all the files. diff --git a/flytekit/tools/fast_registration.py b/flytekit/tools/fast_registration.py index 3ffea6691f..e6c7d3399c 100644 --- a/flytekit/tools/fast_registration.py +++ b/flytekit/tools/fast_registration.py @@ -6,7 +6,6 @@ import pathlib import posixpath import subprocess -import sys import tarfile import tempfile import typing @@ -97,14 +96,7 @@ def fast_package( if options and ( options.copy_style == CopyFileDetection.LOADED_MODULES or options.copy_style == CopyFileDetection.ALL ): - if options.copy_style == CopyFileDetection.LOADED_MODULES: - # This is the 'auto' semantic by default used for pyflyte run, it only copies loaded .py files. - sys_modules = list(sys.modules.values()) - ls, ls_digest = ls_files(str(source), sys_modules, deref_symlinks, ignore) - else: - # This triggers listing of all files, mimicking the old way of creating the tar file. - ls, ls_digest = ls_files(str(source), [], deref_symlinks, ignore) - + ls, ls_digest = ls_files(str(source), options.copy_style, deref_symlinks, ignore) logger.debug(f"Hash digest: {ls_digest}", fg="green") if options.show_files: diff --git a/flytekit/tools/script_mode.py b/flytekit/tools/script_mode.py index adbcd313f4..2a2ef84aa4 100644 --- a/flytekit/tools/script_mode.py +++ b/flytekit/tools/script_mode.py @@ -13,6 +13,7 @@ from types import ModuleType from typing import List, Optional, Tuple, Union +from flytekit.constants import CopyFileDetection from flytekit.loggers import logger from flytekit.tools.ignore import IgnoreGroup @@ -86,7 +87,7 @@ def tar_strip_file_attributes(tar_info: tarfile.TarInfo) -> tarfile.TarInfo: def ls_files( source_path: str, - modules: List[ModuleType], + copy_file_detection: CopyFileDetection, deref_symlinks: bool = False, ignore_group: Optional[IgnoreGroup] = None, ) -> Tuple[List[str], str]: @@ -101,19 +102,17 @@ def ls_files( Then the common root is just the folder a/. The modules list is filtered against this root. Only files representing modules under this root are included - - If the modules list should be a list of all the - - needs to compute digest as well. + If the copy enum is set to loaded_modules, then the loaded sys modules will be used. """ # Unlike the below, the value error here is useful and should be returned to the user, like if absolute and # relative paths are mixed. # This is --copy auto - if modules: - all_files = list_imported_modules_as_files(source_path, modules) - # this is --copy all + if copy_file_detection == CopyFileDetection.LOADED_MODULES: + sys_modules = list(sys.modules.values()) + all_files = list_imported_modules_as_files(source_path, sys_modules) + # this is --copy all (--copy none should never invoke this function) else: all_files = list_all_files(source_path, deref_symlinks, ignore_group) diff --git a/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py b/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py index ac383acea7..8fdaa2b2a1 100644 --- a/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py +++ b/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py @@ -2,7 +2,6 @@ import pathlib import shutil import subprocess -import sys from dataclasses import asdict from importlib import metadata @@ -161,13 +160,7 @@ def build(): dst = pathlib.Path(cfg_path).parent - if image_spec.copy == CopyFileDetection.LOADED_MODULES: - # This is the 'auto' semantic by default used for pyflyte run, it only copies loaded .py files. - sys_modules = list(sys.modules.values()) - ls, _ = ls_files(str(image_spec.source_root), sys_modules, deref_symlinks=False, ignore_group=ignore) - else: - # This triggers listing of all files - ls, _ = ls_files(str(image_spec.source_root), [], deref_symlinks=False, ignore_group=ignore) + ls, _ = ls_files(str(image_spec.source_root), image_spec.copy, deref_symlinks=False, ignore_group=ignore) for file_to_copy in ls: rel_path = os.path.relpath(file_to_copy, start=str(image_spec.source_root)) diff --git a/tests/flytekit/unit/cli/pyflyte/test_script_mode.py b/tests/flytekit/unit/cli/pyflyte/test_script_mode.py index dcccda0cd2..74d8aeab73 100644 --- a/tests/flytekit/unit/cli/pyflyte/test_script_mode.py +++ b/tests/flytekit/unit/cli/pyflyte/test_script_mode.py @@ -4,7 +4,7 @@ import tempfile from flytekit.tools.script_mode import ls_files - +from flytekit.constants import CopyFileDetection # a pytest fixture that creates a tmp directory and creates # a small file structure in it @@ -36,15 +36,16 @@ def dummy_dir_structure(): def test_list_dir(dummy_dir_structure): - files, d = ls_files(str(dummy_dir_structure), []) + files, d = ls_files(str(dummy_dir_structure), CopyFileDetection.ALL) assert len(files) == 5 if os.name != "nt": assert d == "c092f1b85f7c6b2a71881a946c00a855" def test_list_filtered_on_modules(dummy_dir_structure): - import sys # any module will do - files, d = ls_files(str(dummy_dir_structure), [sys]) + # any module will do + import sys # noqa + files, d = ls_files(str(dummy_dir_structure), CopyFileDetection.LOADED_MODULES) # because none of the files are python modules, nothing should be returned assert len(files) == 0 if os.name != "nt":