fix pylint

Signed-off-by: changwangss <[email protected]>
intel · Jun 14, 2024 · 91c973b · 91c973b
1 parent 17287f8
commit 91c973b
Show file tree

Hide file tree

Showing 5 changed files with 306 additions and 564 deletions.
diff --git a/intel_extension_for_transformers/neural_chat/models/model_utils.py b/intel_extension_for_transformers/neural_chat/models/model_utils.py
@@ -699,7 +699,7 @@ def load_model(
                 assert ipex.__version__ >= "2.1.0+cpu", "Please use Intel Extension for PyTorch >=2.1.0+cpu."
                 if re.search("falcon", model_name, re.IGNORECASE):
                     assert transformers.__version__ <= "4.33.3", "Please pip install transformers==4.33.3"
-                from intel_extension_for_transformers.transformers.llm.evaluation.models import TSModelCausalLMForITREX
+                from intel_extension_for_transformers.transformers.llm.quantization.sq_utils import TSModelCausalLMForITREX
                 model = TSModelCausalLMForITREX.from_pretrained(
                     model_name,
                     file_name="best_model.pt"

diff --git a/intel_extension_for_transformers/transformers/llm/quantization/sq_utils.py b/intel_extension_for_transformers/transformers/llm/quantization/sq_utils.py
@@ -14,12 +14,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import re
+
 from typing import Optional, Tuple
 
 import transformers
 from datasets import load_dataset
-from optimum.intel.generation.modeling import TSModelForCausalLM
 from torch.nn.functional import pad
 from torch.utils.data import DataLoader
 from transformers.modeling_outputs import CausalLMOutputWithPast
@@ -315,7 +314,7 @@ def collate_batch(batch):
     )
     return calib_dataloader
 
-
+from optimum.intel.generation.modeling import TSModelForCausalLM
 class TSModelCausalLMForITREX(TSModelForCausalLM):
     def _reorder_cache(
         self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor

diff --git a/intel_extension_for_transformers/transformers/llm/quantization/utils.py b/intel_extension_for_transformers/transformers/llm/quantization/utils.py
@@ -57,9 +57,7 @@
     )
 
 if is_autoround_available():
-    from auto_round.export.export_to_itrex.model_wrapper import (
-        WeightOnlyLinear as auto_round_woqlinear,
-    )  # pylint: disable=E0401
+    from auto_round.export.export_to_itrex.model_wrapper import WeightOnlyLinear as auto_round_woqlinear # pylint: disable=E0401
     from neural_compressor.torch.algorithms.weight_only.autoround import get_dataloader as get_autoround_dataloader
 
 torch = LazyImport("torch")
@@ -299,10 +297,8 @@ def _replace_linear(
                                 use_optimum_format=use_optimum_format,
                             )
                     elif device == "xpu" or device == torch.device("xpu"):
-                        from intel_extension_for_pytorch.nn.utils._quantize_convert import (
-                            WeightOnlyQuantizedLinear as ipex_linear,
-                        )  # pylint: disable=E0401
-
+                        from intel_extension_for_pytorch.nn.utils._quantize_convert import \
+                            WeightOnlyQuantizedLinear as ipex_linear # pylint: disable=E0401
                         model._modules[name] = ipex_linear(
                             in_features,
                             out_features,
@@ -569,6 +565,8 @@ def convert_to_quantized_model(model, config, device="cpu"):
             )
             model = prepare(model, quant_config)
             model = convert(model)
+            # qits module doesn't match with HQQ algorithm.
+            return model
         elif config.quant_method.value == "awq":
             quant_config = AWQConfig(
                 dtype=dtype,