From 0c44e0bdc34c64b9713de64c1b4d3695a7a5889d Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 12 Apr 2024 18:28:18 +0200 Subject: [PATCH] clean tests --- tests/neural_compressor/test_ipex.py | 2 +- tests/neural_compressor/test_optimization.py | 40 +++++++++-------- tests/neural_compressor/utils_tests.py | 46 +++++++++++++++++++- 3 files changed, 68 insertions(+), 20 deletions(-) diff --git a/tests/neural_compressor/test_ipex.py b/tests/neural_compressor/test_ipex.py index 27f4da30a1..1e302ac7f6 100644 --- a/tests/neural_compressor/test_ipex.py +++ b/tests/neural_compressor/test_ipex.py @@ -1,4 +1,4 @@ -# Copyright 2023 The HuggingFace Team. All rights reserved. +# Copyright 2024 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/neural_compressor/test_optimization.py b/tests/neural_compressor/test_optimization.py index 1426bbf0e7..289972b374 100644 --- a/tests/neural_compressor/test_optimization.py +++ b/tests/neural_compressor/test_optimization.py @@ -44,7 +44,7 @@ pipeline, set_seed, ) -from utils_tests import SEED, INCTestMixin, _generate_dataset +from utils_tests import MODEL_NAMES, SEED, INCTestMixin, _generate_dataset from optimum.intel.utils.import_utils import is_torch_version, is_intel_extension_for_transformers_available @@ -71,22 +71,23 @@ class QuantizationTest(INCTestMixin): SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = ( - ("text-classification", "hf-internal-testing/tiny-random-BertForSequenceClassification", 21), - ("text-generation", "hf-internal-testing/tiny-random-BloomForCausalLM", 21), + ("text-classification", "bert", 21), + # ("text-generation", "bloom", 21), ) SUPPORTED_ARCHITECTURES_DYNAMIC = SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS + ( - ("fill-mask", "hf-internal-testing/tiny-random-BertForMaskedLM", 22), - ("token-classification", "hf-internal-testing/tiny-random-AlbertForTokenClassification", 26), + ("fill-mask", "bert", 22), + ("token-classification", "albert", 26), ) TEXT_GENERATION_SUPPORTED_ARCHITECTURES = ( - "hf-internal-testing/tiny-random-BloomForCausalLM", - "hf-internal-testing/tiny-random-GPTNeoForCausalLM", + "bloom", + "gpt_neo", ) @parameterized.expand(SUPPORTED_ARCHITECTURES_DYNAMIC) - def test_dynamic_quantization(self, task, model_name, expected_quantized_matmuls): + def test_dynamic_quantization(self, task, model_arch, expected_quantized_matmuls): + model_name = MODEL_NAMES[model_arch] quantization_config = PostTrainingQuantConfig(approach="dynamic") model_class = ORT_SUPPORTED_TASKS[task]["class"][0] tokenizer = AutoTokenizer.from_pretrained(model_name) @@ -121,8 +122,9 @@ def test_dynamic_quantization(self, task, model_name, expected_quantized_matmuls ) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS) - def test_static_quantization(self, task, model_name, expected_quantized_matmuls): + def test_static_quantization(self, task, model_arch, expected_quantized_matmuls): num_samples = 10 + model_name = MODEL_NAMES[model_arch] model_class = ORT_SUPPORTED_TASKS[task]["class"][0] tokenizer = AutoTokenizer.from_pretrained(model_name) if tokenizer.pad_token is None: @@ -245,7 +247,8 @@ def test_dynamic_diffusion_model(self): self.assertTrue(np.allclose(loaded_pipe_outputs, outputs, atol=1e-4)) @parameterized.expand(TEXT_GENERATION_SUPPORTED_ARCHITECTURES) - def test_quantize_text_generate_model(self, model_id): + def test_quantize_text_generate_model(self, model_arch): + model_id = MODEL_NAMES[model_arch] set_seed(42) model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -274,13 +277,11 @@ def calibration_fn(p_model): class TrainingOptimizationTest(INCTestMixin): - SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = ( - ("text-classification", "hf-internal-testing/tiny-random-BertForSequenceClassification", 21), - ("text-generation", "hf-internal-testing/tiny-random-BloomForCausalLM", 21), - ) + SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("text-classification", "bert", 21),) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS) - def test_aware_training_quantization(self, task, model_name, expected_quantized_matmuls): + def test_aware_training_quantization(self, task, model_arch, expected_quantized_matmuls): + model_name = MODEL_NAMES[model_arch] quantization_config = QuantizationAwareTrainingConfig() save_onnx_model = False @@ -303,7 +304,8 @@ def test_aware_training_quantization(self, task, model_name, expected_quantized_ ) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS) - def test_aware_training_quantization_pruning(self, task, model_name, expected_quantized_matmuls): + def test_aware_training_quantization_pruning(self, task, model_arch, expected_quantized_matmuls): + model_name = MODEL_NAMES[model_arch] quantization_config = QuantizationAwareTrainingConfig() target_sparsity = 0.9 pruning_config = WeightPruningConfig( @@ -335,7 +337,8 @@ def test_aware_training_quantization_pruning(self, task, model_name, expected_qu ) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS) - def test_magnitude_pruning(self, task, model_name, expected_quantized_matmuls): + def test_magnitude_pruning(self, task, model_arch, expected_quantized_matmuls): + model_name = MODEL_NAMES[model_arch] target_sparsity = 0.9 # end_step should be training_args.num_train_epochs * (len(train_dataset) // training_args.per_device_train_batch_size) pruning_config = WeightPruningConfig( @@ -374,7 +377,8 @@ def test_magnitude_pruning(self, task, model_name, expected_quantized_matmuls): self.assertEqual(inc_config.pruning["pattern"], "4x1") @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS) - def test_distillation(self, task, model_name, expected_quantized_matmuls): + def test_distillation(self, task, model_arch, expected_quantized_matmuls): + model_name = MODEL_NAMES[model_arch] teacher_model = ORT_SUPPORTED_TASKS[task]["class"][0].auto_model_class.from_pretrained(model_name) distillation_config = DistillationConfig(teacher_model=teacher_model) save_onnx_model = True diff --git a/tests/neural_compressor/utils_tests.py b/tests/neural_compressor/utils_tests.py index 214aa73be5..fb8f5794ea 100644 --- a/tests/neural_compressor/utils_tests.py +++ b/tests/neural_compressor/utils_tests.py @@ -65,6 +65,50 @@ } +MODEL_NAMES = { + "albert": "hf-internal-testing/tiny-random-albert", + "beit": "hf-internal-testing/tiny-random-BeitForImageClassification", + "bert": "hf-internal-testing/tiny-random-bert", + "bart": "hf-internal-testing/tiny-random-bart", + "blenderbot-small": "hf-internal-testing/tiny-random-BlenderbotModel", + "blenderbot": "hf-internal-testing/tiny-random-BlenderbotModel", + "bloom": "hf-internal-testing/tiny-random-BloomModel", + "convbert": "hf-internal-testing/tiny-random-ConvBertForSequenceClassification", + "codegen": "hf-internal-testing/tiny-random-CodeGenForCausalLM", + "convnext": "hf-internal-testing/tiny-random-convnext", + "distilbert": "hf-internal-testing/tiny-random-distilbert", + "electra": "hf-internal-testing/tiny-random-electra", + "flaubert": "hf-internal-testing/tiny-random-flaubert", + "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel", + "gpt2": "hf-internal-testing/tiny-random-gpt2", + "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel", + "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM", + "gptj": "hf-internal-testing/tiny-random-GPTJModel", + "levit": "hf-internal-testing/tiny-random-LevitModel", + "llama": "fxmarty/tiny-llama-fast-tokenizer", + "llama2": "Jiqing/tiny_random_llama2", + "marian": "sshleifer/tiny-marian-en-de", + "mbart": "hf-internal-testing/tiny-random-mbart", + "mistral": "echarlaix/tiny-random-mistral", + "mobilenet_v1": "google/mobilenet_v1_0.75_192", + "mobilenet_v2": "hf-internal-testing/tiny-random-MobileNetV2Model", + "mobilevit": "hf-internal-testing/tiny-random-mobilevit", + "mpt": "hf-internal-testing/tiny-random-MptForCausalLM", + "mt5": "stas/mt5-tiny-random", + "opt": "hf-internal-testing/tiny-random-OPTModel", + "phi": "echarlaix/tiny-random-PhiForCausalLM", + "resnet": "hf-internal-testing/tiny-random-resnet", + "roberta": "hf-internal-testing/tiny-random-roberta", + "roformer": "hf-internal-testing/tiny-random-roformer", + "squeezebert": "hf-internal-testing/tiny-random-squeezebert", + "t5": "hf-internal-testing/tiny-random-t5", + "unispeech": "hf-internal-testing/tiny-random-unispeech", + "vit": "hf-internal-testing/tiny-random-vit", + "wav2vec2": "anton-l/wav2vec2-random-tiny-classifier", + "xlm": "hf-internal-testing/tiny-random-xlm", +} + + def num_quantized_matmul_onnx_model(onnx_model): num_quantized_matmul = 0 for node in onnx_model.graph.node: @@ -145,7 +189,7 @@ def check_model_outputs( ort_model = ORT_SUPPORTED_TASKS[task]["class"][0].from_pretrained(save_directory, **model_kwargs) ort_outputs = ort_model(**tokens) self.assertTrue("logits" in ort_outputs) - if task != "fill-mask": + if task not in {"fill-mask", "text-generation"}: self.assertTrue(torch.allclose(ort_outputs.logits, outputs, atol=1e-2)) @staticmethod