From 37e5e57e3aa717c45f2a8de2f7ca267147c9b425 Mon Sep 17 00:00:00 2001
From: David Corvoysier <david@huggingface.co>
Date: Wed, 12 Feb 2025 15:59:08 +0000
Subject: [PATCH 1/3] feat(cache): get the list of cached models

This adds a method to get the list of cached models for the current
optimum version as a set of (model_arch, model_org, model_name).
---
 optimum/neuron/utils/__init__.py        | 16 +++++++--
 optimum/neuron/utils/hub_cache_utils.py | 44 +++++++++++++++++++++++++
 tests/cache/test_neuronx_cache.py       |  5 ++-
 3 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/optimum/neuron/utils/__init__.py b/optimum/neuron/utils/__init__.py
index 5f8b7fca0..1a4179f26 100644
--- a/optimum/neuron/utils/__init__.py
+++ b/optimum/neuron/utils/__init__.py
@@ -32,7 +32,13 @@
         "ENCODER_NAME",
         "NEURON_FILE_NAME",
     ],
-    "hub_cache_utils": ["ModelCacheEntry", "get_hub_cached_entries", "hub_neuronx_cache", "synchronize_hub_cache"],
+    "hub_cache_utils": [
+        "ModelCacheEntry",
+        "get_hub_cached_entries",
+        "get_hub_cached_models",
+        "hub_neuronx_cache",
+        "synchronize_hub_cache",
+    ],
     "import_utils": [
         "is_accelerate_available",
         "is_neuron_available",
@@ -94,7 +100,13 @@
         ENCODER_NAME,
         NEURON_FILE_NAME,
     )
-    from .hub_cache_utils import ModelCacheEntry, get_hub_cached_entries, hub_neuronx_cache, synchronize_hub_cache
+    from .hub_cache_utils import (
+        ModelCacheEntry,
+        get_hub_cached_entries,
+        get_hub_cached_models,
+        hub_neuronx_cache,
+        synchronize_hub_cache,
+    )
     from .import_utils import (
         is_accelerate_available,
         is_neuron_available,
diff --git a/optimum/neuron/utils/hub_cache_utils.py b/optimum/neuron/utils/hub_cache_utils.py
index ad2add627..9f8c21bbe 100644
--- a/optimum/neuron/utils/hub_cache_utils.py
+++ b/optimum/neuron/utils/hub_cache_utils.py
@@ -25,6 +25,7 @@
 from typing import Any, Dict, List, Literal, Optional, Union
 
 from huggingface_hub import HfApi, get_token
+from huggingface_hub.errors import EntryNotFoundError
 from huggingface_hub.hf_api import RepoFile
 from transformers import AutoConfig, PretrainedConfig
 
@@ -33,6 +34,7 @@
 from .import_utils import is_neuronx_available
 from .patching import patch_everywhere
 from .require_utils import requires_torch_neuronx
+from .version_utils import get_neuronxcc_version
 
 
 if is_neuronx_available():
@@ -447,6 +449,48 @@ def get_hub_cached_entries(
     return model_entries
 
 
+def get_hub_cached_models(
+    mode: Union[Literal["training"], Literal["inference"], Mode], cache_repo_id: Optional[str] = None
+):
+    """Get the list of cached models for the specified mode for the current version
+
+    Args:
+        mode (`Union[Literal["training"], Literal["inference"], Mode]`): the cache mode (inference or training).
+        cache_repo_id (`Optional[str]`): the path to a cache repo id if different from the default one.
+    Returns:
+        A set of (model_arch, model_org, model_id)
+    """
+    if cache_repo_id is None:
+        cache_repo_id = get_hf_hub_cache_repo()
+    registry_folder = get_registry_folder_for_mode(mode)
+    api = HfApi()
+    root = api.list_repo_tree(cache_repo_id, path_in_repo="", recursive=False)
+    for root_file in root:
+        compiler_pattern = "neuronxcc-"
+        if is_neuronx_available():
+            # If we know the current compiler we can avoid going through all of them in the hub cache
+            compiler_pattern += get_neuronxcc_version()
+        if root_file.path.startswith(compiler_pattern):
+            # Look for a registry of cached models for the current optimum-version
+            path_in_repo = root_file.path + "/" + registry_folder
+            root_sub_paths = path_in_repo.split("/")
+            try:
+                registry = api.list_repo_tree(cache_repo_id, path_in_repo=path_in_repo, recursive=True)
+                cached_models = set({})
+                for registry_file in registry:
+                    # Extract each cached model as a tuple of (arch, org, model)
+                    if registry_file.path.endswith(".json"):
+                        sub_paths = registry_file.path.split("/")
+                        if len(sub_paths) == len(root_sub_paths) + 4:
+                            model_arch, model_org, model_name = sub_paths[-4:-1]
+                            cached_models.add((model_arch, model_org, model_name))
+                return cached_models
+            except EntryNotFoundError:
+                # No cached models for the current version
+                continue
+    return set({})
+
+
 def _prepare_config_for_matching(entry_config: Dict, target_entry: ModelCacheEntry, model_type: str):
     if model_type == "stable-diffusion":
         # Remove neuron config for comparison as the target does not have it
diff --git a/tests/cache/test_neuronx_cache.py b/tests/cache/test_neuronx_cache.py
index 8066dc8cc..d71d065d2 100644
--- a/tests/cache/test_neuronx_cache.py
+++ b/tests/cache/test_neuronx_cache.py
@@ -32,7 +32,7 @@
     NeuronStableDiffusionPipeline,
     NeuronStableDiffusionXLPipeline,
 )
-from optimum.neuron.utils import get_hub_cached_entries, synchronize_hub_cache
+from optimum.neuron.utils import get_hub_cached_entries, get_hub_cached_models, synchronize_hub_cache
 from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx
 
 
@@ -198,6 +198,9 @@ def test_decoder_cache(cache_repos):
     model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id)
     assert len(model_entries) == 1
     assert model_entries[0] == model.config.neuron
+    # Also verify that the model appears in the list of cached models
+    cached_models = get_hub_cached_models("inference")
+    assert ("gpt2", "hf-internal-testing", "tiny-random-gpt2") in cached_models
     # Clear the local cache
     for root, dirs, files in os.walk(cache_path):
         for f in files:

From 991506efec86e4c1e1ed3597323586712bd62eba Mon Sep 17 00:00:00 2001
From: David Corvoysier <david@huggingface.co>
Date: Mon, 17 Feb 2025 10:28:22 +0100
Subject: [PATCH 2/3] review: Apply suggestions

Co-authored-by: Michael Benayoun <michael@huggingface.co>
---
 optimum/neuron/utils/hub_cache_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/optimum/neuron/utils/hub_cache_utils.py b/optimum/neuron/utils/hub_cache_utils.py
index 9f8c21bbe..442e2abff 100644
--- a/optimum/neuron/utils/hub_cache_utils.py
+++ b/optimum/neuron/utils/hub_cache_utils.py
@@ -476,7 +476,7 @@ def get_hub_cached_models(
             root_sub_paths = path_in_repo.split("/")
             try:
                 registry = api.list_repo_tree(cache_repo_id, path_in_repo=path_in_repo, recursive=True)
-                cached_models = set({})
+                cached_models = set()
                 for registry_file in registry:
                     # Extract each cached model as a tuple of (arch, org, model)
                     if registry_file.path.endswith(".json"):
@@ -488,7 +488,7 @@ def get_hub_cached_models(
             except EntryNotFoundError:
                 # No cached models for the current version
                 continue
-    return set({})
+    return set()
 
 
 def _prepare_config_for_matching(entry_config: Dict, target_entry: ModelCacheEntry, model_type: str):

From 356f2704e813ebc76bc9a7473d51071dc5b7d9f9 Mon Sep 17 00:00:00 2001
From: David Corvoysier <david@huggingface.co>
Date: Mon, 17 Feb 2025 10:31:47 +0100
Subject: [PATCH 3/3] review: explain file pattern

---
 optimum/neuron/utils/hub_cache_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/optimum/neuron/utils/hub_cache_utils.py b/optimum/neuron/utils/hub_cache_utils.py
index 442e2abff..04fe439a2 100644
--- a/optimum/neuron/utils/hub_cache_utils.py
+++ b/optimum/neuron/utils/hub_cache_utils.py
@@ -482,6 +482,7 @@ def get_hub_cached_models(
                     if registry_file.path.endswith(".json"):
                         sub_paths = registry_file.path.split("/")
                         if len(sub_paths) == len(root_sub_paths) + 4:
+                            # Look at the last four splits, i.e. model_arch/model_org/model_name/SHA.json
                             model_arch, model_org, model_name = sub_paths[-4:-1]
                             cached_models.add((model_arch, model_org, model_name))
                 return cached_models