quic · quic-hemagnih · Dec 16, 2024 · Dec 16, 2024 · Dec 16, 2024 · Dec 16, 2024
@@ -1,6 +1,6 @@
 # -----------------------------------------------------------------------------
 #
-# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
@@ -12,8 +12,26 @@
 # hf_transfer is imported (will happen on line 15 via leading imports)
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 
+from transformers import AutoConfig
+
+from QEfficient.transformers.modeling_utils import (
+    MODEL_TYPE_TO_CONFIG_CLS_AND_ARCH_CLS,
+    get_auto_model_class,
+    get_model_class_type_from_model_type,
+)
 from QEfficient.utils.logging_utils import logger
 
+# loop over all the model types which are not present in transformers and register them
+for model_type, model_cls in MODEL_TYPE_TO_CONFIG_CLS_AND_ARCH_CLS.items():
+    # Register the model config class based on the model type. This will be first element in the tuple
+    AutoConfig.register(model_type, model_cls[0])
+
+    model_class_type = get_model_class_type_from_model_type(model_type)
+    AutoModelClassName = get_auto_model_class(model_class_type, model_cls[1])
+
+    # Register the non transformer library Class and config class using AutoModelClass
+    AutoModelClassName.register(model_cls[0], model_cls[1])
+
 
 def check_qaic_sdk():
     """Check if QAIC SDK is installed"""

@@ -36,6 +36,35 @@ class QEffDynamicCache(DynamicCache):
 
     """
 
+    def write_only(self, key_states, value_states, layer_idx, cache_kwargs):
+        # Update the cache
+        if len(self.key_cache) <= layer_idx:
+            self.key_cache.append(key_states)
+            self.value_cache.append(value_states)
+        else:
+            position_ids = cache_kwargs.get("position_ids")
+            self.key_cache[layer_idx] = CtxScatterFunc.apply(self.key_cache[layer_idx], position_ids, key_states)
+            self.value_cache[layer_idx] = CtxScatterFunc.apply(self.value_cache[layer_idx], position_ids, value_states)
+
+    def read_only(self, layer_idx, **cache_kwargs):
+        k_out, v_out = self.key_cache[layer_idx], self.value_cache[layer_idx]
+        position_ids = cache_kwargs.get("position_ids")
+        ctx_len = k_out.shape[2]
+        ctx_indices = torch.arange(ctx_len)[None, None, ...]
+        gather_limit = position_ids.max(1, keepdim=True).values.unsqueeze(1)
+        invalid_mask = ctx_indices > gather_limit
+
+        if torch.onnx.is_in_onnx_export():
+            invalid_idx_value = torch.iinfo(torch.int32).max
+        else:
+            invalid_idx_value = 0
+
+        ctx_indices = torch.where(invalid_mask, invalid_idx_value, ctx_indices)
+        k_out = CtxGatherFunc.apply(k_out, ctx_indices)
+        v_out = CtxGatherFunc.apply(v_out, ctx_indices)
+        v_out = torch.where(invalid_mask.unsqueeze(-1), torch.tensor(0.0, dtype=torch.float32), v_out)
+        return k_out, v_out
+
     def update(
         self,
         key_states: torch.Tensor,

@@ -88,6 +88,12 @@
 
 from QEfficient.customop import CustomRMSNormAIC
 
+# Placeholder for all non-transformer models
+from QEfficient.transformers.models.llama_swiftkv.modeling_llama_swiftkv import (
+    LlamaSwiftKVConfig,
+    LlamaSwiftKVForCausalLM,
+)
+
 from .models.codegen.modeling_codegen import (
     QEffCodeGenAttention,
     QeffCodeGenBlock,
@@ -271,6 +277,17 @@
     WhisperForConditionalGeneration: QEffWhisperForConditionalGeneration,
 }
 
+# Map of model type to config class and Model architecture class
+# While onboarding new models make sure to add the new model card names to this dictionary.
+# Developers are expected to follow the naming conventions like ForCausalLM while defining the class names
+MODEL_TYPE_TO_CONFIG_CLS_AND_ARCH_CLS = {"llama_swiftkv": [LlamaSwiftKVConfig, LlamaSwiftKVForCausalLM]}
+
+# list of sub-strings representing the model type, this is typically taken from llama-swiftkv
+LIST_OF_MODEL_TYPES = {"swiftkv"}
+
+# list of sub-strings used for representing the model Architecture class name, for example LlamaSwiftKVForCausalLM
+MODEL_TYPE_TO_MODEL_CLASS_TYPE = {"swiftkv": "SwiftKVFor"}
+
 
 def _prepare_cross_attention_mask(
     cross_attention_mask: torch.Tensor,
@@ -362,3 +379,51 @@ def _create_causal_mask(
         attention_mask = attention_mask.unsqueeze(1)
 
     return attention_mask
+
+
+def convert_str_to_class(className):
+    """
+    Convert the string to class name
+    ---------
+    :className: `str`- Class name string.
+    Return:
+        Class Name
+    """
+    module = __import__("transformers")
+    return getattr(module, className)
+
+
+def get_auto_model_class(model_type, NonTransformerModelCls):
+    """
+    Register the Non Transformer Models like swiftkv
+    ---------------------------------------
+    : model_type: str: name of the Non Transformer model for example llama_swiftkv
+    : NonTransformerModelCls: SwiftKV model class name for example LlamaSwiftKVForCausalLM
+    """
+
+    # Construct the AutoModel class name using NonTransformerModel class e.g. SwiftKVModel Class name, this code is written to make things generic
+    nonTransformerModelClsName = NonTransformerModelCls.__name__
+    start_index = nonTransformerModelClsName.find(model_type)
+
+    # Calculate the index after model_type example "SwiftKVFor"
+    substring_start = start_index + len(model_type)
+
+    # Get the substring after model_type example "SwiftKVFor"
+    nonTransformerModel = nonTransformerModelClsName[substring_start:]
+
+    autoModelName = "AutoModelFor" + nonTransformerModel
+
+    # Convert the string to class name
+    autoModelClassName = convert_str_to_class(autoModelName)
+
+    return autoModelClassName
+
+
+def get_model_class_type_from_model_type(model_type):
+    for substring in LIST_OF_MODEL_TYPES:
+        if substring in model_type:
+            model_class_type = substring
+            break
+
+    model_class_name = MODEL_TYPE_TO_MODEL_CLASS_TYPE[model_class_type]
+    return model_class_name
@@ -0,0 +1,6 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# -----------------------------------------------------------------------------