Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a method to get the list of cached models #784

Merged
merged 3 commits into from
Feb 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions optimum/neuron/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,13 @@
"ENCODER_NAME",
"NEURON_FILE_NAME",
],
"hub_cache_utils": ["ModelCacheEntry", "get_hub_cached_entries", "hub_neuronx_cache", "synchronize_hub_cache"],
"hub_cache_utils": [
"ModelCacheEntry",
"get_hub_cached_entries",
"get_hub_cached_models",
"hub_neuronx_cache",
"synchronize_hub_cache",
],
"import_utils": [
"is_accelerate_available",
"is_neuron_available",
Expand Down Expand Up @@ -94,7 +100,13 @@
ENCODER_NAME,
NEURON_FILE_NAME,
)
from .hub_cache_utils import ModelCacheEntry, get_hub_cached_entries, hub_neuronx_cache, synchronize_hub_cache
from .hub_cache_utils import (
ModelCacheEntry,
get_hub_cached_entries,
get_hub_cached_models,
hub_neuronx_cache,
synchronize_hub_cache,
)
from .import_utils import (
is_accelerate_available,
is_neuron_available,
Expand Down
45 changes: 45 additions & 0 deletions optimum/neuron/utils/hub_cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from typing import Any, Dict, List, Literal, Optional, Union

from huggingface_hub import HfApi, get_token
from huggingface_hub.errors import EntryNotFoundError
from huggingface_hub.hf_api import RepoFile
from transformers import AutoConfig, PretrainedConfig

Expand All @@ -33,6 +34,7 @@
from .import_utils import is_neuronx_available
from .patching import patch_everywhere
from .require_utils import requires_torch_neuronx
from .version_utils import get_neuronxcc_version


if is_neuronx_available():
Expand Down Expand Up @@ -447,6 +449,49 @@ def get_hub_cached_entries(
return model_entries


def get_hub_cached_models(
mode: Union[Literal["training"], Literal["inference"], Mode], cache_repo_id: Optional[str] = None
):
"""Get the list of cached models for the specified mode for the current version

Args:
mode (`Union[Literal["training"], Literal["inference"], Mode]`): the cache mode (inference or training).
cache_repo_id (`Optional[str]`): the path to a cache repo id if different from the default one.
Returns:
A set of (model_arch, model_org, model_id)
"""
if cache_repo_id is None:
cache_repo_id = get_hf_hub_cache_repo()
registry_folder = get_registry_folder_for_mode(mode)
api = HfApi()
root = api.list_repo_tree(cache_repo_id, path_in_repo="", recursive=False)
for root_file in root:
compiler_pattern = "neuronxcc-"
if is_neuronx_available():
# If we know the current compiler we can avoid going through all of them in the hub cache
compiler_pattern += get_neuronxcc_version()
if root_file.path.startswith(compiler_pattern):
# Look for a registry of cached models for the current optimum-version
path_in_repo = root_file.path + "/" + registry_folder
root_sub_paths = path_in_repo.split("/")
try:
registry = api.list_repo_tree(cache_repo_id, path_in_repo=path_in_repo, recursive=True)
cached_models = set()
for registry_file in registry:
# Extract each cached model as a tuple of (arch, org, model)
if registry_file.path.endswith(".json"):
sub_paths = registry_file.path.split("/")
if len(sub_paths) == len(root_sub_paths) + 4:
# Look at the last four splits, i.e. model_arch/model_org/model_name/SHA.json
model_arch, model_org, model_name = sub_paths[-4:-1]
cached_models.add((model_arch, model_org, model_name))
return cached_models
except EntryNotFoundError:
# No cached models for the current version
continue
return set()


def _prepare_config_for_matching(entry_config: Dict, target_entry: ModelCacheEntry, model_type: str):
if model_type == "stable-diffusion":
# Remove neuron config for comparison as the target does not have it
Expand Down
5 changes: 4 additions & 1 deletion tests/cache/test_neuronx_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
NeuronStableDiffusionPipeline,
NeuronStableDiffusionXLPipeline,
)
from optimum.neuron.utils import get_hub_cached_entries, synchronize_hub_cache
from optimum.neuron.utils import get_hub_cached_entries, get_hub_cached_models, synchronize_hub_cache
from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx


Expand Down Expand Up @@ -198,6 +198,9 @@ def test_decoder_cache(cache_repos):
model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id)
assert len(model_entries) == 1
assert model_entries[0] == model.config.neuron
# Also verify that the model appears in the list of cached models
cached_models = get_hub_cached_models("inference")
assert ("gpt2", "hf-internal-testing", "tiny-random-gpt2") in cached_models
# Clear the local cache
for root, dirs, files in os.walk(cache_path):
for f in files:
Expand Down
Loading