From f7141a6d8792f1116ce2178884fb470b00b02846 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 26 Sep 2023 18:58:02 +0200 Subject: [PATCH] fix test --- tests/neural_compressor/test_optimization.py | 7 ++++--- tests/neural_compressor/utils_tests.py | 10 ++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/neural_compressor/test_optimization.py b/tests/neural_compressor/test_optimization.py index 21cdeabfaa..a5f39460fd 100644 --- a/tests/neural_compressor/test_optimization.py +++ b/tests/neural_compressor/test_optimization.py @@ -68,11 +68,11 @@ class OptimizationTest(INCTestMixin): SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = ( ("text-classification", "hf-internal-testing/tiny-random-BertForSequenceClassification", 21), - # ("text-generation", "hf-internal-testing/tiny-random-BloomForCausalLM", 1), # TODO : enable causal lm task once INC ONNX export fixed + # ("text-generation", "hf-internal-testing/tiny-random-BloomForCausalLM", 21), # TODO : enable causal lm task once INC ONNX export fixed ) SUPPORTED_ARCHITECTURES_DYNAMIC = SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS + ( - ("fill-mask", "hf-internal-testing/tiny-random-DistilBertForMaskedLM", 22), + ("fill-mask", "hf-internal-testing/tiny-random-BertForMaskedLM", 22), ("token-classification", "hf-internal-testing/tiny-random-AlbertForTokenClassification", 26), ) @@ -88,12 +88,13 @@ def test_dynamic_quantization(self, task, model_name, expected_quantized_matmuls tokenizer = AutoTokenizer.from_pretrained(model_name) save_onnx_model = False quantized_model = None + model_kwargs = {"use_cache" : False, "use_io_binding": False} if task == "text-generation" else {} with tempfile.TemporaryDirectory() as tmp_dir: for backend in ["torch", "ort"]: if backend == "torch": model = model_class.auto_model_class.from_pretrained(model_name) else: - model = model_class.from_pretrained(model_name, export=True) + model = model_class.from_pretrained(model_name, export=True, **model_kwargs) quantizer = INCQuantizer.from_pretrained(model, task=task) quantizer.quantize( diff --git a/tests/neural_compressor/utils_tests.py b/tests/neural_compressor/utils_tests.py index 521ea71e79..c1f0086e53 100644 --- a/tests/neural_compressor/utils_tests.py +++ b/tests/neural_compressor/utils_tests.py @@ -93,12 +93,13 @@ def check_model_outputs( load_onnx_model=True, load_inc_model=True, num_samples=None, - file_name=ONNX_WEIGHTS_NAME, + file_name=None, ): tokens = tokenizer("This is a sample input", return_tensors="pt") + file_name = ONNX_WEIGHTS_NAME if task!="text-generation" else "decoder_model.onnx" model_kwargs = ( - {"decoder_file_name": file_name, "use_cache": False} + {"decoder_file_name": file_name, "use_cache": False, "use_io_binding": False} if task == "text-generation" else {"file_name": file_name} ) @@ -113,7 +114,7 @@ def check_model_outputs( if load_inc_model: inc_model = eval(_HEAD_TO_AUTOMODELS[task]).from_pretrained(save_directory) inc_model_outputs = inc_model(**tokens) - self.assertTrue(torch.equal(outputs, inc_model_outputs["logits"])) + self.assertTrue(torch.allclose(inc_model_outputs["logits"], outputs, atol=1e-3)) # self.assertEqual(inc_config.save_onnx_model, load_onnx_model) if load_onnx_model: @@ -127,7 +128,8 @@ def check_model_outputs( ort_model = ORT_SUPPORTED_TASKS[task]["class"][0].from_pretrained(save_directory, **model_kwargs) ort_outputs = ort_model(**tokens) self.assertTrue("logits" in ort_outputs) - # self.assertTrue(torch.allclose(ort_outputs.logits, outputs, atol=1e-3)) + if task != "fill-mask": + self.assertTrue(torch.allclose(ort_outputs.logits, outputs, atol=1e-3)) @staticmethod def get_trainer(