Handle cos-sin cache in every forward in case of long-context + LoRA

HabanaAI · Jan 2, 2025 · c52c37e · c52c37e
1 parent 99c89da
commit c52c37e
Showing 1 changed file with 4 additions and 1 deletion.
diff --git a/vllm/model_executor/layers/rotary_embedding.py b/vllm/model_executor/layers/rotary_embedding.py
@@ -232,7 +232,10 @@ def forward_hpu(
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         from habana_frameworks.torch.hpex.kernels import (
             RotaryPosEmbeddingMode, apply_rotary_pos_emb)
-        if not hasattr(self, "sin") or self.sin is None or offsets is not None:
+
+        # Prepare cos-sin caches for long-context + LoRA with offsets for every
+        # forward, since the offset information wasn't available previously
+        if hasattr(self, "scaling_factors") or self.sin is None:
             self.prepare_cos_sin(positions, offsets)
         num_tokens = positions.shape[0] * positions.shape[1]
         # HPU RoPE kernel requires hidden dimension for cos and sin to be equal