From 37e5e57e3aa717c45f2a8de2f7ca267147c9b425 Mon Sep 17 00:00:00 2001 From: David Corvoysier Date: Wed, 12 Feb 2025 15:59:08 +0000 Subject: [PATCH 1/3] feat(cache): get the list of cached models This adds a method to get the list of cached models for the current optimum version as a set of (model_arch, model_org, model_name). --- optimum/neuron/utils/__init__.py | 16 +++++++-- optimum/neuron/utils/hub_cache_utils.py | 44 +++++++++++++++++++++++++ tests/cache/test_neuronx_cache.py | 5 ++- 3 files changed, 62 insertions(+), 3 deletions(-) diff --git a/optimum/neuron/utils/__init__.py b/optimum/neuron/utils/__init__.py index 5f8b7fca0..1a4179f26 100644 --- a/optimum/neuron/utils/__init__.py +++ b/optimum/neuron/utils/__init__.py @@ -32,7 +32,13 @@ "ENCODER_NAME", "NEURON_FILE_NAME", ], - "hub_cache_utils": ["ModelCacheEntry", "get_hub_cached_entries", "hub_neuronx_cache", "synchronize_hub_cache"], + "hub_cache_utils": [ + "ModelCacheEntry", + "get_hub_cached_entries", + "get_hub_cached_models", + "hub_neuronx_cache", + "synchronize_hub_cache", + ], "import_utils": [ "is_accelerate_available", "is_neuron_available", @@ -94,7 +100,13 @@ ENCODER_NAME, NEURON_FILE_NAME, ) - from .hub_cache_utils import ModelCacheEntry, get_hub_cached_entries, hub_neuronx_cache, synchronize_hub_cache + from .hub_cache_utils import ( + ModelCacheEntry, + get_hub_cached_entries, + get_hub_cached_models, + hub_neuronx_cache, + synchronize_hub_cache, + ) from .import_utils import ( is_accelerate_available, is_neuron_available, diff --git a/optimum/neuron/utils/hub_cache_utils.py b/optimum/neuron/utils/hub_cache_utils.py index ad2add627..9f8c21bbe 100644 --- a/optimum/neuron/utils/hub_cache_utils.py +++ b/optimum/neuron/utils/hub_cache_utils.py @@ -25,6 +25,7 @@ from typing import Any, Dict, List, Literal, Optional, Union from huggingface_hub import HfApi, get_token +from huggingface_hub.errors import EntryNotFoundError from huggingface_hub.hf_api import RepoFile from transformers import AutoConfig, PretrainedConfig @@ -33,6 +34,7 @@ from .import_utils import is_neuronx_available from .patching import patch_everywhere from .require_utils import requires_torch_neuronx +from .version_utils import get_neuronxcc_version if is_neuronx_available(): @@ -447,6 +449,48 @@ def get_hub_cached_entries( return model_entries +def get_hub_cached_models( + mode: Union[Literal["training"], Literal["inference"], Mode], cache_repo_id: Optional[str] = None +): + """Get the list of cached models for the specified mode for the current version + + Args: + mode (`Union[Literal["training"], Literal["inference"], Mode]`): the cache mode (inference or training). + cache_repo_id (`Optional[str]`): the path to a cache repo id if different from the default one. + Returns: + A set of (model_arch, model_org, model_id) + """ + if cache_repo_id is None: + cache_repo_id = get_hf_hub_cache_repo() + registry_folder = get_registry_folder_for_mode(mode) + api = HfApi() + root = api.list_repo_tree(cache_repo_id, path_in_repo="", recursive=False) + for root_file in root: + compiler_pattern = "neuronxcc-" + if is_neuronx_available(): + # If we know the current compiler we can avoid going through all of them in the hub cache + compiler_pattern += get_neuronxcc_version() + if root_file.path.startswith(compiler_pattern): + # Look for a registry of cached models for the current optimum-version + path_in_repo = root_file.path + "/" + registry_folder + root_sub_paths = path_in_repo.split("/") + try: + registry = api.list_repo_tree(cache_repo_id, path_in_repo=path_in_repo, recursive=True) + cached_models = set({}) + for registry_file in registry: + # Extract each cached model as a tuple of (arch, org, model) + if registry_file.path.endswith(".json"): + sub_paths = registry_file.path.split("/") + if len(sub_paths) == len(root_sub_paths) + 4: + model_arch, model_org, model_name = sub_paths[-4:-1] + cached_models.add((model_arch, model_org, model_name)) + return cached_models + except EntryNotFoundError: + # No cached models for the current version + continue + return set({}) + + def _prepare_config_for_matching(entry_config: Dict, target_entry: ModelCacheEntry, model_type: str): if model_type == "stable-diffusion": # Remove neuron config for comparison as the target does not have it diff --git a/tests/cache/test_neuronx_cache.py b/tests/cache/test_neuronx_cache.py index 8066dc8cc..d71d065d2 100644 --- a/tests/cache/test_neuronx_cache.py +++ b/tests/cache/test_neuronx_cache.py @@ -32,7 +32,7 @@ NeuronStableDiffusionPipeline, NeuronStableDiffusionXLPipeline, ) -from optimum.neuron.utils import get_hub_cached_entries, synchronize_hub_cache +from optimum.neuron.utils import get_hub_cached_entries, get_hub_cached_models, synchronize_hub_cache from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx @@ -198,6 +198,9 @@ def test_decoder_cache(cache_repos): model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id) assert len(model_entries) == 1 assert model_entries[0] == model.config.neuron + # Also verify that the model appears in the list of cached models + cached_models = get_hub_cached_models("inference") + assert ("gpt2", "hf-internal-testing", "tiny-random-gpt2") in cached_models # Clear the local cache for root, dirs, files in os.walk(cache_path): for f in files: From 991506efec86e4c1e1ed3597323586712bd62eba Mon Sep 17 00:00:00 2001 From: David Corvoysier Date: Mon, 17 Feb 2025 10:28:22 +0100 Subject: [PATCH 2/3] review: Apply suggestions Co-authored-by: Michael Benayoun --- optimum/neuron/utils/hub_cache_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/optimum/neuron/utils/hub_cache_utils.py b/optimum/neuron/utils/hub_cache_utils.py index 9f8c21bbe..442e2abff 100644 --- a/optimum/neuron/utils/hub_cache_utils.py +++ b/optimum/neuron/utils/hub_cache_utils.py @@ -476,7 +476,7 @@ def get_hub_cached_models( root_sub_paths = path_in_repo.split("/") try: registry = api.list_repo_tree(cache_repo_id, path_in_repo=path_in_repo, recursive=True) - cached_models = set({}) + cached_models = set() for registry_file in registry: # Extract each cached model as a tuple of (arch, org, model) if registry_file.path.endswith(".json"): @@ -488,7 +488,7 @@ def get_hub_cached_models( except EntryNotFoundError: # No cached models for the current version continue - return set({}) + return set() def _prepare_config_for_matching(entry_config: Dict, target_entry: ModelCacheEntry, model_type: str): From 356f2704e813ebc76bc9a7473d51071dc5b7d9f9 Mon Sep 17 00:00:00 2001 From: David Corvoysier Date: Mon, 17 Feb 2025 10:31:47 +0100 Subject: [PATCH 3/3] review: explain file pattern --- optimum/neuron/utils/hub_cache_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/neuron/utils/hub_cache_utils.py b/optimum/neuron/utils/hub_cache_utils.py index 442e2abff..04fe439a2 100644 --- a/optimum/neuron/utils/hub_cache_utils.py +++ b/optimum/neuron/utils/hub_cache_utils.py @@ -482,6 +482,7 @@ def get_hub_cached_models( if registry_file.path.endswith(".json"): sub_paths = registry_file.path.split("/") if len(sub_paths) == len(root_sub_paths) + 4: + # Look at the last four splits, i.e. model_arch/model_org/model_name/SHA.json model_arch, model_org, model_name = sub_paths[-4:-1] cached_models.add((model_arch, model_org, model_name)) return cached_models