Skip to content

Commit

Permalink
Merge branch 'main' into llm-server-int-test
Browse files Browse the repository at this point in the history
  • Loading branch information
stbaione authored Nov 5, 2024
2 parents ee675ce + 85220d1 commit 5876c37
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 4 deletions.
4 changes: 2 additions & 2 deletions sharktank/sharktank/serving_poc/framework/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def __init__(self, session: DeviceSession, index: int = 0):
self._semaphore = session.device.create_semaphore(0)
self._step = 0

def execute_sequential(self, command_buffers: list[HalCommandBuffer]):
def execute_sequential(self, command_buffer: HalCommandBuffer):
"""Executes a list of command buffers at the current step, advancing to the
next.
"""
Expand All @@ -329,7 +329,7 @@ def execute_sequential(self, command_buffers: list[HalCommandBuffer]):
self._step = next_step
sem = self._semaphore
self._device.queue_execute(
command_buffers, [(sem, current_step)], [(sem, next_step)]
command_buffer, [(sem, current_step)], [(sem, next_step)]
)

def current_fence(self) -> HalFence:
Expand Down
4 changes: 2 additions & 2 deletions sharktank/sharktank/serving_poc/llm/impl/service_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ async def prefill(self) -> TimelineGuarded[HalBufferView]:

# Perform h2d transfers.
cb.end()
work_queue.execute_sequential([cb])
work_queue.execute_sequential(cb)

# Inputs:
# token_ids
Expand Down Expand Up @@ -468,7 +468,7 @@ async def decode(self) -> TimelineGuarded[HalBufferView]:

# Perform h2d transfers.
cb.end()
work_queue.execute_sequential([cb])
work_queue.execute_sequential(cb)

# Inputs:
# token_ids
Expand Down
3 changes: 3 additions & 0 deletions shortfin/docs/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ Array
.. autoclass:: base_array
.. autoclass:: device_array
:members:
.. autoclass:: RandomGenerator

.. autofunction:: _shortfin_default.lib.array.fill_randn
.. autofunction:: _shortfin_default.lib.array.argmax

Local
Expand Down Expand Up @@ -52,6 +54,7 @@ AMD GPU
^^^^^^^
.. automodule:: _shortfin_default.lib.local.amdgpu
.. autoclass:: SystemBuilder
:members:
.. autoclass:: AMDGPUDevice

Host
Expand Down

0 comments on commit 5876c37

Please sign in to comment.