Skip to content

Commit

Permalink
replace some more references
Browse files Browse the repository at this point in the history
  • Loading branch information
renxida committed Nov 23, 2024
1 parent f630098 commit 0e8d414
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions shortfin/python/shortfin_apps/llm/components/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@
import shortfin as sf
import shortfin.array as sfnp

from .cache import BasePagedAttentionCache, AttnPageEntry
from .kvcache.base_attention_cache import BasePagedAttentionCache
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from .kvcache.page_pool import PageInfo


class InferencePhase(Enum):
Expand Down Expand Up @@ -42,7 +46,7 @@ def __init__(self, phase: InferencePhase, input_token_ids: list[int]):

# Cache pages that have been locked for this request.
self._cache: BasePagedAttentionCache | None = None
self.locked_pages: list[AttnPageEntry] | None = None
self.locked_pages: list[PageInfo] | None = None

def reset(self, phase: InferencePhase):
"""Resets all per request state in preparation for an subsequent execution."""
Expand All @@ -69,14 +73,14 @@ def free_cache_pages(self):
cache.release_pages(self.input_token_ids, pages)

def lock_initial_cache_pages(
self, cache: BasePagedAttentionCache, pages: list[AttnPageEntry]
self, cache: BasePagedAttentionCache, pages: list[PageInfo]
):
assert not self._cache
self._cache = cache
self.locked_pages = pages

def lock_new_cache_pages(
self, cache: BasePagedAttentionCache, pages: list[AttnPageEntry]
self, cache: BasePagedAttentionCache, pages: list[PageInfo]
):
assert self._cache is cache
self.locked_pages.extend(pages)
Expand Down

0 comments on commit 0e8d414

Please sign in to comment.