replace some more references

nod-ai · Nov 23, 2024 · 0e8d414 · 0e8d414
1 parent f630098
commit 0e8d414
Showing 1 changed file with 8 additions and 4 deletions.
diff --git a/shortfin/python/shortfin_apps/llm/components/messages.py b/shortfin/python/shortfin_apps/llm/components/messages.py
@@ -9,7 +9,11 @@
 import shortfin as sf
 import shortfin.array as sfnp
 
-from .cache import BasePagedAttentionCache, AttnPageEntry
+from .kvcache.base_attention_cache import BasePagedAttentionCache
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .kvcache.page_pool import PageInfo
 
 
 class InferencePhase(Enum):
@@ -42,7 +46,7 @@ def __init__(self, phase: InferencePhase, input_token_ids: list[int]):
 
         # Cache pages that have been locked for this request.
         self._cache: BasePagedAttentionCache | None = None
-        self.locked_pages: list[AttnPageEntry] | None = None
+        self.locked_pages: list[PageInfo] | None = None
 
     def reset(self, phase: InferencePhase):
         """Resets all per request state in preparation for an subsequent execution."""
@@ -69,14 +73,14 @@ def free_cache_pages(self):
             cache.release_pages(self.input_token_ids, pages)
 
     def lock_initial_cache_pages(
-        self, cache: BasePagedAttentionCache, pages: list[AttnPageEntry]
+        self, cache: BasePagedAttentionCache, pages: list[PageInfo]
     ):
         assert not self._cache
         self._cache = cache
         self.locked_pages = pages
 
     def lock_new_cache_pages(
-        self, cache: BasePagedAttentionCache, pages: list[AttnPageEntry]
+        self, cache: BasePagedAttentionCache, pages: list[PageInfo]
     ):
         assert self._cache is cache
         self.locked_pages.extend(pages)