From 3e5209b477f46fbc8640d69f6c4c756fd2a28464 Mon Sep 17 00:00:00 2001
From: Marius Brehler <marius.brehler@amd.com>
Date: Mon, 28 Oct 2024 18:49:42 +0100
Subject: [PATCH 1/4] Harmonize license information and fix SPDX tag (#339)

---
 shortfin/CMakeLists.txt                            | 6 +++---
 shortfin/build_tools/cmake/shortfin_library.cmake  | 6 +++---
 shortfin/dev_me.py                                 | 6 +++---
 shortfin/python/CMakeLists.txt                     | 6 +++---
 shortfin/src/CMakeLists.txt                        | 6 +++---
 shortfin/src/shortfin/CMakeLists.txt               | 6 +++---
 shortfin/src/shortfin/array/CMakeLists.txt         | 6 +++---
 shortfin/src/shortfin/local/CMakeLists.txt         | 6 +++---
 shortfin/src/shortfin/local/systems/CMakeLists.txt | 6 +++---
 shortfin/src/shortfin/support/CMakeLists.txt       | 6 +++---
 10 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/shortfin/CMakeLists.txt b/shortfin/CMakeLists.txt
index 4e2648a87..b3c2ee24f 100644
--- a/shortfin/CMakeLists.txt
+++ b/shortfin/CMakeLists.txt
@@ -1,8 +1,8 @@
 # Copyright 2024 Advanced Micro Devices, Inc.
 #
-# Licensed under the Apache License v2.0 with LLVM Exceptions. See
-# https://llvm.org/LICENSE.txt for license information. SPDX-License-Identifier:
-# Apache-2.0 WITH LLVM-exception
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 cmake_minimum_required(VERSION 3.29)
 
diff --git a/shortfin/build_tools/cmake/shortfin_library.cmake b/shortfin/build_tools/cmake/shortfin_library.cmake
index 26a31101b..872e24838 100644
--- a/shortfin/build_tools/cmake/shortfin_library.cmake
+++ b/shortfin/build_tools/cmake/shortfin_library.cmake
@@ -1,8 +1,8 @@
 # Copyright 2024 Advanced Micro Devices, Inc.
 #
-# Licensed under the Apache License v2.0 with LLVM Exceptions. See
-# https://llvm.org/LICENSE.txt for license information. SPDX-License-Identifier:
-# Apache-2.0 WITH LLVM-exception
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 set(SHORTFIN_DEFAULT_COPTS
   # General clang and GCC options application to C and C++.
diff --git a/shortfin/dev_me.py b/shortfin/dev_me.py
index be02d67fa..ca6916767 100755
--- a/shortfin/dev_me.py
+++ b/shortfin/dev_me.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python3
 # Copyright 2024 Advanced Micro Devices, Inc.
 #
-# Licensed under the Apache License v2.0 with LLVM Exceptions. See
-# https://llvm.org/LICENSE.txt for license information. SPDX-License-Identifier:
-# Apache-2.0 WITH LLVM-exception
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 # dev_me.py
 #
diff --git a/shortfin/python/CMakeLists.txt b/shortfin/python/CMakeLists.txt
index adf9d7879..d125416af 100644
--- a/shortfin/python/CMakeLists.txt
+++ b/shortfin/python/CMakeLists.txt
@@ -1,8 +1,8 @@
 # Copyright 2024 Advanced Micro Devices, Inc.
 #
-# Licensed under the Apache License v2.0 with LLVM Exceptions. See
-# https://llvm.org/LICENSE.txt for license information. SPDX-License-Identifier:
-# Apache-2.0 WITH LLVM-exception
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 # shortfin publishes multiple python packages: - _shortfin: Trampoline
 # __init__.py which looks at environment variables to load an appropriate native
diff --git a/shortfin/src/CMakeLists.txt b/shortfin/src/CMakeLists.txt
index 5e7c1d8e5..e27318764 100644
--- a/shortfin/src/CMakeLists.txt
+++ b/shortfin/src/CMakeLists.txt
@@ -1,8 +1,8 @@
 # Copyright 2024 Advanced Micro Devices, Inc.
 #
-# Licensed under the Apache License v2.0 with LLVM Exceptions. See
-# https://llvm.org/LICENSE.txt for license information. SPDX-License-Identifier:
-# Apache-2.0 WITH LLVM-exception
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 add_subdirectory(shortfin)
 
diff --git a/shortfin/src/shortfin/CMakeLists.txt b/shortfin/src/shortfin/CMakeLists.txt
index 1bea0003b..058e0e336 100644
--- a/shortfin/src/shortfin/CMakeLists.txt
+++ b/shortfin/src/shortfin/CMakeLists.txt
@@ -1,8 +1,8 @@
 # Copyright 2024 Advanced Micro Devices, Inc.
 #
-# Licensed under the Apache License v2.0 with LLVM Exceptions. See
-# https://llvm.org/LICENSE.txt for license information. SPDX-License-Identifier:
-# Apache-2.0 WITH LLVM-exception
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 add_subdirectory(array)
 add_subdirectory(local)
diff --git a/shortfin/src/shortfin/array/CMakeLists.txt b/shortfin/src/shortfin/array/CMakeLists.txt
index d40eed23f..48ab33590 100644
--- a/shortfin/src/shortfin/array/CMakeLists.txt
+++ b/shortfin/src/shortfin/array/CMakeLists.txt
@@ -1,8 +1,8 @@
 # Copyright 2024 Advanced Micro Devices, Inc.
 #
-# Licensed under the Apache License v2.0 with LLVM Exceptions. See
-# https://llvm.org/LICENSE.txt for license information. SPDX-License-Identifier:
-# Apache-2.0 WITH LLVM-exception
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 shortfin_cc_component(
   NAME
diff --git a/shortfin/src/shortfin/local/CMakeLists.txt b/shortfin/src/shortfin/local/CMakeLists.txt
index 9f51c78bb..250bd79a2 100644
--- a/shortfin/src/shortfin/local/CMakeLists.txt
+++ b/shortfin/src/shortfin/local/CMakeLists.txt
@@ -1,8 +1,8 @@
 # Copyright 2024 Advanced Micro Devices, Inc.
 #
-# Licensed under the Apache License v2.0 with LLVM Exceptions. See
-# https://llvm.org/LICENSE.txt for license information. SPDX-License-Identifier:
-# Apache-2.0 WITH LLVM-exception
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 add_subdirectory(systems)
 
diff --git a/shortfin/src/shortfin/local/systems/CMakeLists.txt b/shortfin/src/shortfin/local/systems/CMakeLists.txt
index 3ec5f17a6..b2bcbef23 100644
--- a/shortfin/src/shortfin/local/systems/CMakeLists.txt
+++ b/shortfin/src/shortfin/local/systems/CMakeLists.txt
@@ -1,8 +1,8 @@
 # Copyright 2024 Advanced Micro Devices, Inc.
 #
-# Licensed under the Apache License v2.0 with LLVM Exceptions. See
-# https://llvm.org/LICENSE.txt for license information. SPDX-License-Identifier:
-# Apache-2.0 WITH LLVM-exception
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 set(_SYSTEM_COMPONENTS)
 
diff --git a/shortfin/src/shortfin/support/CMakeLists.txt b/shortfin/src/shortfin/support/CMakeLists.txt
index 9cb0d2b45..cbf171894 100644
--- a/shortfin/src/shortfin/support/CMakeLists.txt
+++ b/shortfin/src/shortfin/support/CMakeLists.txt
@@ -1,8 +1,8 @@
 # Copyright 2024 Advanced Micro Devices, Inc.
 #
-# Licensed under the Apache License v2.0 with LLVM Exceptions. See
-# https://llvm.org/LICENSE.txt for license information. SPDX-License-Identifier:
-# Apache-2.0 WITH LLVM-exception
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 shortfin_cc_component(
   NAME

From fe56b308ca4d654d24556bc8a9a89c7c90bc67c5 Mon Sep 17 00:00:00 2001
From: Rob Suderman <rob.suderman@gmail.com>
Date: Mon, 28 Oct 2024 10:59:37 -0700
Subject: [PATCH 2/4] Addition of booleans is currently wrong in iree-compile
 (#341)

Addition of booleans is performing xor which breaks causal mapping.

---------

Co-authored-by: Xida Ren (Cedar) <cedar.ren@gmail.com>
---
 sharktank/sharktank/layers/causal_llm.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sharktank/sharktank/layers/causal_llm.py b/sharktank/sharktank/layers/causal_llm.py
index 63c58e860..7a09995a8 100644
--- a/sharktank/sharktank/layers/causal_llm.py
+++ b/sharktank/sharktank/layers/causal_llm.py
@@ -95,10 +95,9 @@ def input_mask(
 
     def decode_attention_mask(self, boolean_input_mask: torch.Tensor):
         dtype = self.attention_dtype
-        numeric_mask = torch.zeros_like(boolean_input_mask, dtype=dtype)
-        numeric_mask.masked_fill_(
-            boolean_input_mask, self._maximally_negative_value(dtype)
-        )
+        numeric_mask = torch.where(
+            boolean_input_mask, self._maximally_negative_value(dtype), 0
+        ).to(dtype)
         return numeric_mask.unsqueeze(1).unsqueeze(1).to(self.device)
 
     def attention_mask(
@@ -127,9 +126,10 @@ def attention_mask(
         dtype = self.attention_dtype
         _, batch_seq_len = input_mask.shape
         causal_mask = causal_context_mask[:, :, :batch_seq_len, :batch_seq_len]
-        boolean_mask = causal_mask + input_mask[:, None, None, :]
-        numeric_mask = torch.zeros_like(boolean_mask, dtype=dtype)
-        numeric_mask.masked_fill_(boolean_mask, self._maximally_negative_value(dtype))
+        boolean_mask = torch.logical_or(causal_mask, input_mask[:, None, None, :])
+        numeric_mask = torch.where(
+            boolean_mask, self._maximally_negative_value(dtype), 0
+        ).to(dtype)
         return numeric_mask.to(self.device)
 
     def extract_tokens_from_logits(

From 98392d0ee98803414142339c43e99b9f2a45a53c Mon Sep 17 00:00:00 2001
From: Boian Petkantchin <boian.petkantchin@amd.com>
Date: Mon, 28 Oct 2024 14:10:56 -0400
Subject: [PATCH 3/4] Fix difference of LLM export for the direct vs paged
 cache (#347)

Before the work on unifying the cache interfaces lands there are some
differences between sharded, direct and paged caches.
The direct cache uses a list of tensors for each transformer block while
paged cache has one slab and paged sharded expects a list of shards.
---
 .../sharktank/examples/export_paged_llm_v1.py | 49 +++++++++++--------
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/sharktank/sharktank/examples/export_paged_llm_v1.py b/sharktank/sharktank/examples/export_paged_llm_v1.py
index ce6f0864a..0436c0008 100644
--- a/sharktank/sharktank/examples/export_paged_llm_v1.py
+++ b/sharktank/sharktank/examples/export_paged_llm_v1.py
@@ -116,35 +116,38 @@ def setup_cache(model, shard_count):
                 page_count=hp.context_length // llama_config.block_seq_stride
             )
             page_dim = torch.export.Dim("page")
+
             dynamic_shapes = [{0: page_dim}]
+            unpacked = cache_state
+            arg_affinities = {}
+            shard_dim = None
+
+            # Need to unpacke that state when sharded
+            if llama_config.tensor_parallelism_size > 1:
+                shard_dim = cache_state[0].shard_dim
+
+                unpacked = [[shard._data for shard in cs.shards] for cs in cache_state]
+                dynamic_shapes = [
+                    [ds] * llama_config.tensor_parallelism_size for ds in dynamic_shapes
+                ]
+
+                for i in range(llama_config.tensor_parallelism_size):
+                    arg_affinities[i] = DeviceAffinity(str(i))
+
+            return unpacked, shard_dim, dynamic_shapes, arg_affinities
+
         elif model.config.kv_cache_type == "direct":
             cache_state = model.cache.allocate(bs=1)
             # Direct cache dimensions:
             #   2 * transformer_block_count of...
             #   [bs, seq_length, attn_head_count, attn_head_dim]
             dynamic_shapes = [None]
+            arg_affinities = {}
+            shard_dim = None
+            return torch.stack(cache_state), shard_dim, dynamic_shapes, arg_affinities
         else:
             raise NotImplementedError(f"Unsupported KV cache type: {type(model.cache)}")
 
-        unpacked = cache_state
-        dynamic_shapes = dynamic_shapes
-        arg_affinities = {}
-        shard_dim = None
-
-        # Need to unpacke that state when sharded
-        if llama_config.tensor_parallelism_size > 1:
-            shard_dim = cache_state[0].shard_dim
-
-            unpacked = [[shard._data for shard in cs.shards] for cs in cache_state]
-            dynamic_shapes = [
-                [ds] * llama_config.tensor_parallelism_size for ds in dynamic_shapes
-            ]
-
-            for i in range(llama_config.tensor_parallelism_size):
-                arg_affinities[i] = DeviceAffinity(str(i))
-
-        return torch.stack(unpacked), shard_dim, dynamic_shapes, arg_affinities
-
     def repack_cache(cache, shard_dim):
         return [SplitPrimitiveTensor(ts=c, shard_dim=shard_dim) for c in cache]
 
@@ -184,7 +187,13 @@ def generate_batch_prefill(bs: int):
             arg_device=arg_affinities,
         )
         def _(model, tokens, seq_lens, seq_block_ids, cs):
-            cache_tensors = torch.unbind(cs)
+            if (
+                model.config.tensor_parallelism_size == 1
+                and model.config.kv_cache_type == "direct"
+            ):
+                cache_tensors = torch.unbind(cs)
+            else:
+                cache_tensors = cs
 
             sl = tokens.shape[1]
             input_mask = model.input_mask(seq_lens, sl)

From f925a5bfc451509aae93e74b412b68b9d81d2310 Mon Sep 17 00:00:00 2001
From: Marius Brehler <marius.brehler@amd.com>
Date: Mon, 28 Oct 2024 19:34:19 +0100
Subject: [PATCH 4/4] Revise and split requirements files (#340)

Moves dependencies defined in the root `requirements.txt` to
`sharktank/` and splits out test only deps to `requirements-tests.txt`
file. Dependencies only used for development / in the CI are moved to
`requirements-dev.txt` and the root requirements file is now used to
pull in all the deps. Furthermore, some no longer used dependencies are
removed.
---
 .github/workflows/ci-sharktank.yml |  6 +++---
 .github/workflows/ci_eval.yaml     |  2 +-
 requirements-dev.txt               | 10 +++++++++
 requirements.txt                   | 34 ++++--------------------------
 sharktank/requirements-tests.txt   |  2 ++
 sharktank/requirements.txt         | 17 ++++++++++++++-
 sharktank/setup.py                 |  1 -
 7 files changed, 36 insertions(+), 36 deletions(-)
 create mode 100644 requirements-dev.txt

diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml
index ead663f53..73243086a 100644
--- a/.github/workflows/ci-sharktank.yml
+++ b/.github/workflows/ci-sharktank.yml
@@ -6,14 +6,14 @@ on:
     paths:
       - '.github/workflows/ci-sharktank.yml'
       - 'sharktank/**'
-      - '*requirements.txt'
+      - '*requirements*.txt'
   push:
     branches:
       - main
     paths:
       - '.github/workflows/ci-sharktank.yml'
       - 'sharktank/**'
-      - '*requirements.txt'
+      - '*requirements*.txt'
 
 concurrency:
   # A PR number if a pull request and otherwise the commit hash. This cancels
@@ -52,7 +52,7 @@ jobs:
         id: cache-pip
         with:
           path: ${{ env.PIP_CACHE_DIR }}
-          key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }}
+          key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','sharktank/requirements*.txt') }}
 
       - name: Install pip deps
         run: |
diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml
index 7288ed8ac..7b80bd61b 100644
--- a/.github/workflows/ci_eval.yaml
+++ b/.github/workflows/ci_eval.yaml
@@ -45,7 +45,7 @@ jobs:
         id: cache-pip
         with:
           path: ${{ env.PIP_CACHE_DIR }}
-          key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }}
+          key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','sharktank/requirements*.txt') }}
 
       - name: Install sharktank deps
         run: |
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 000000000..e736fe3bd
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,10 @@
+# Used for managing pre-commit flows.
+pre-commit
+
+# Type checking
+mypy==1.8.0
+types-requests==2.31.0.20240125
+
+# Testing
+pytest==8.0.0
+pytest-xdist==3.5.0
diff --git a/requirements.txt b/requirements.txt
index 0198314f8..cc2edf876 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,30 +1,4 @@
-# Runtime deps.
-gguf==0.6.0
-numpy==1.26.3
-onnx==1.15.0
-
-# Model deps.
-huggingface-hub==0.22.2
-transformers==4.40.0
-sentencepiece==0.2.0
-
-# It is expected that you have installed a PyTorch version/variant specific
-# to your needs, so we only include a minimum version spec.
-# TODO: Use a versioned release once 2.3.0 drops.
-torch>=2.3.0.dev1
-
-# Used for managing pre-commit flows.
-pre-commit
-
-# Type checking
-mypy==1.8.0
-types-requests==2.31.0.20240125
-
-# Testing
-parameterized
-pytest==8.0.0
-pytest-xdist==3.5.0
-
-# Serving deps.
-fastapi==0.112.2
-uvicorn==0.30.6
+-r sharktank/requirements.txt
+-r sharktank/requirements-tests.txt
+-r shortfin/requirements-tests.txt
+-r requirements-dev.txt
diff --git a/sharktank/requirements-tests.txt b/sharktank/requirements-tests.txt
index d7266a5e8..4be48fdde 100644
--- a/sharktank/requirements-tests.txt
+++ b/sharktank/requirements-tests.txt
@@ -1 +1,3 @@
 datasets==3.0.0
+parameterized
+pytest==8.0.0
diff --git a/sharktank/requirements.txt b/sharktank/requirements.txt
index 6b21f239f..ad231d524 100644
--- a/sharktank/requirements.txt
+++ b/sharktank/requirements.txt
@@ -1 +1,16 @@
-gguf
+# Runtime deps.
+gguf==0.6.0
+numpy==1.26.3
+
+# Model deps.
+huggingface-hub==0.22.2
+transformers==4.40.0
+datasets
+
+# It is expected that you have installed a PyTorch version/variant specific
+# to your needs, so we only include a minimum version spec.
+torch>=2.3.0
+
+# Serving deps.
+fastapi==0.112.2
+uvicorn==0.30.6
diff --git a/sharktank/setup.py b/sharktank/setup.py
index ab6e92d33..8ffcf3984 100644
--- a/sharktank/setup.py
+++ b/sharktank/setup.py
@@ -99,7 +99,6 @@ def initialize_options(self):
     extras_require={
         "testing": [
             f"pytest{get_version_spec('pytest')}",
-            f"pytest-xdist{get_version_spec('pytest-xdist')}",
         ],
     },
     cmdclass={"build": BuildCommand},