Skip to content

Commit

Permalink
clean tests
Browse files Browse the repository at this point in the history
  • Loading branch information
echarlaix committed Apr 12, 2024
1 parent 54ce18f commit 0c44e0b
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 20 deletions.
2 changes: 1 addition & 1 deletion tests/neural_compressor/test_ipex.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023 The HuggingFace Team. All rights reserved.
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
40 changes: 22 additions & 18 deletions tests/neural_compressor/test_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
pipeline,
set_seed,
)
from utils_tests import SEED, INCTestMixin, _generate_dataset
from utils_tests import MODEL_NAMES, SEED, INCTestMixin, _generate_dataset
from optimum.intel.utils.import_utils import is_torch_version, is_intel_extension_for_transformers_available


Expand All @@ -71,22 +71,23 @@

class QuantizationTest(INCTestMixin):
SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (
("text-classification", "hf-internal-testing/tiny-random-BertForSequenceClassification", 21),
("text-generation", "hf-internal-testing/tiny-random-BloomForCausalLM", 21),
("text-classification", "bert", 21),
# ("text-generation", "bloom", 21),
)

SUPPORTED_ARCHITECTURES_DYNAMIC = SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS + (
("fill-mask", "hf-internal-testing/tiny-random-BertForMaskedLM", 22),
("token-classification", "hf-internal-testing/tiny-random-AlbertForTokenClassification", 26),
("fill-mask", "bert", 22),
("token-classification", "albert", 26),
)

TEXT_GENERATION_SUPPORTED_ARCHITECTURES = (
"hf-internal-testing/tiny-random-BloomForCausalLM",
"hf-internal-testing/tiny-random-GPTNeoForCausalLM",
"bloom",
"gpt_neo",
)

@parameterized.expand(SUPPORTED_ARCHITECTURES_DYNAMIC)
def test_dynamic_quantization(self, task, model_name, expected_quantized_matmuls):
def test_dynamic_quantization(self, task, model_arch, expected_quantized_matmuls):
model_name = MODEL_NAMES[model_arch]
quantization_config = PostTrainingQuantConfig(approach="dynamic")
model_class = ORT_SUPPORTED_TASKS[task]["class"][0]
tokenizer = AutoTokenizer.from_pretrained(model_name)
Expand Down Expand Up @@ -121,8 +122,9 @@ def test_dynamic_quantization(self, task, model_name, expected_quantized_matmuls
)

@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
def test_static_quantization(self, task, model_name, expected_quantized_matmuls):
def test_static_quantization(self, task, model_arch, expected_quantized_matmuls):
num_samples = 10
model_name = MODEL_NAMES[model_arch]
model_class = ORT_SUPPORTED_TASKS[task]["class"][0]
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
Expand Down Expand Up @@ -245,7 +247,8 @@ def test_dynamic_diffusion_model(self):
self.assertTrue(np.allclose(loaded_pipe_outputs, outputs, atol=1e-4))

@parameterized.expand(TEXT_GENERATION_SUPPORTED_ARCHITECTURES)
def test_quantize_text_generate_model(self, model_id):
def test_quantize_text_generate_model(self, model_arch):
model_id = MODEL_NAMES[model_arch]
set_seed(42)
model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
Expand Down Expand Up @@ -274,13 +277,11 @@ def calibration_fn(p_model):


class TrainingOptimizationTest(INCTestMixin):
SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (
("text-classification", "hf-internal-testing/tiny-random-BertForSequenceClassification", 21),
("text-generation", "hf-internal-testing/tiny-random-BloomForCausalLM", 21),
)
SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("text-classification", "bert", 21),)

@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
def test_aware_training_quantization(self, task, model_name, expected_quantized_matmuls):
def test_aware_training_quantization(self, task, model_arch, expected_quantized_matmuls):
model_name = MODEL_NAMES[model_arch]
quantization_config = QuantizationAwareTrainingConfig()
save_onnx_model = False

Expand All @@ -303,7 +304,8 @@ def test_aware_training_quantization(self, task, model_name, expected_quantized_
)

@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
def test_aware_training_quantization_pruning(self, task, model_name, expected_quantized_matmuls):
def test_aware_training_quantization_pruning(self, task, model_arch, expected_quantized_matmuls):
model_name = MODEL_NAMES[model_arch]
quantization_config = QuantizationAwareTrainingConfig()
target_sparsity = 0.9
pruning_config = WeightPruningConfig(
Expand Down Expand Up @@ -335,7 +337,8 @@ def test_aware_training_quantization_pruning(self, task, model_name, expected_qu
)

@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
def test_magnitude_pruning(self, task, model_name, expected_quantized_matmuls):
def test_magnitude_pruning(self, task, model_arch, expected_quantized_matmuls):
model_name = MODEL_NAMES[model_arch]
target_sparsity = 0.9
# end_step should be training_args.num_train_epochs * (len(train_dataset) // training_args.per_device_train_batch_size)
pruning_config = WeightPruningConfig(
Expand Down Expand Up @@ -374,7 +377,8 @@ def test_magnitude_pruning(self, task, model_name, expected_quantized_matmuls):
self.assertEqual(inc_config.pruning["pattern"], "4x1")

@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
def test_distillation(self, task, model_name, expected_quantized_matmuls):
def test_distillation(self, task, model_arch, expected_quantized_matmuls):
model_name = MODEL_NAMES[model_arch]
teacher_model = ORT_SUPPORTED_TASKS[task]["class"][0].auto_model_class.from_pretrained(model_name)
distillation_config = DistillationConfig(teacher_model=teacher_model)
save_onnx_model = True
Expand Down
46 changes: 45 additions & 1 deletion tests/neural_compressor/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,50 @@
}


MODEL_NAMES = {
"albert": "hf-internal-testing/tiny-random-albert",
"beit": "hf-internal-testing/tiny-random-BeitForImageClassification",
"bert": "hf-internal-testing/tiny-random-bert",
"bart": "hf-internal-testing/tiny-random-bart",
"blenderbot-small": "hf-internal-testing/tiny-random-BlenderbotModel",
"blenderbot": "hf-internal-testing/tiny-random-BlenderbotModel",
"bloom": "hf-internal-testing/tiny-random-BloomModel",
"convbert": "hf-internal-testing/tiny-random-ConvBertForSequenceClassification",
"codegen": "hf-internal-testing/tiny-random-CodeGenForCausalLM",
"convnext": "hf-internal-testing/tiny-random-convnext",
"distilbert": "hf-internal-testing/tiny-random-distilbert",
"electra": "hf-internal-testing/tiny-random-electra",
"flaubert": "hf-internal-testing/tiny-random-flaubert",
"gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
"gpt2": "hf-internal-testing/tiny-random-gpt2",
"gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
"gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
"gptj": "hf-internal-testing/tiny-random-GPTJModel",
"levit": "hf-internal-testing/tiny-random-LevitModel",
"llama": "fxmarty/tiny-llama-fast-tokenizer",
"llama2": "Jiqing/tiny_random_llama2",
"marian": "sshleifer/tiny-marian-en-de",
"mbart": "hf-internal-testing/tiny-random-mbart",
"mistral": "echarlaix/tiny-random-mistral",
"mobilenet_v1": "google/mobilenet_v1_0.75_192",
"mobilenet_v2": "hf-internal-testing/tiny-random-MobileNetV2Model",
"mobilevit": "hf-internal-testing/tiny-random-mobilevit",
"mpt": "hf-internal-testing/tiny-random-MptForCausalLM",
"mt5": "stas/mt5-tiny-random",
"opt": "hf-internal-testing/tiny-random-OPTModel",
"phi": "echarlaix/tiny-random-PhiForCausalLM",
"resnet": "hf-internal-testing/tiny-random-resnet",
"roberta": "hf-internal-testing/tiny-random-roberta",
"roformer": "hf-internal-testing/tiny-random-roformer",
"squeezebert": "hf-internal-testing/tiny-random-squeezebert",
"t5": "hf-internal-testing/tiny-random-t5",
"unispeech": "hf-internal-testing/tiny-random-unispeech",
"vit": "hf-internal-testing/tiny-random-vit",
"wav2vec2": "anton-l/wav2vec2-random-tiny-classifier",
"xlm": "hf-internal-testing/tiny-random-xlm",
}


def num_quantized_matmul_onnx_model(onnx_model):
num_quantized_matmul = 0
for node in onnx_model.graph.node:
Expand Down Expand Up @@ -145,7 +189,7 @@ def check_model_outputs(
ort_model = ORT_SUPPORTED_TASKS[task]["class"][0].from_pretrained(save_directory, **model_kwargs)
ort_outputs = ort_model(**tokens)
self.assertTrue("logits" in ort_outputs)
if task != "fill-mask":
if task not in {"fill-mask", "text-generation"}:
self.assertTrue(torch.allclose(ort_outputs.logits, outputs, atol=1e-2))

@staticmethod
Expand Down

0 comments on commit 0c44e0b

Please sign in to comment.