Skip to content

Commit

Permalink
add test
Browse files Browse the repository at this point in the history
  • Loading branch information
echarlaix committed Sep 26, 2023
1 parent 355e00f commit 907871f
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 34 deletions.
69 changes: 46 additions & 23 deletions tests/neural_compressor/test_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,13 @@

class OptimizationTest(INCTestMixin):
SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (
("text-classification", "hf-internal-testing/tiny-random-bert", 34),
("text-classification", "hf-internal-testing/tiny-random-BertForSequenceClassification", 21),
# ("text-generation", "hf-internal-testing/tiny-random-BloomForCausalLM", 1), # TODO : enable causal lm task once INC ONNX export fixed
)

SUPPORTED_ARCHITECTURES_DYNAMIC = SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS + (
("fill-mask", "hf-internal-testing/tiny-random-DistilBertForMaskedLM", 34),
("token-classification", "hf-internal-testing/tiny-random-AlbertForTokenClassification", 34),
("fill-mask", "hf-internal-testing/tiny-random-DistilBertForMaskedLM", 22),
("token-classification", "hf-internal-testing/tiny-random-AlbertForTokenClassification", 26),
)

TEXT_GENERATION_SUPPORTED_ARCHITECTURES = (
Expand All @@ -84,58 +84,81 @@ class OptimizationTest(INCTestMixin):
@parameterized.expand(SUPPORTED_ARCHITECTURES_DYNAMIC)
def test_dynamic_quantization(self, task, model_name, expected_quantized_matmuls):
quantization_config = PostTrainingQuantConfig(approach="dynamic")
model = ORT_SUPPORTED_TASKS[task]["class"][0].auto_model_class.from_pretrained(model_name)
model_class = ORT_SUPPORTED_TASKS[task]["class"][0]
tokenizer = AutoTokenizer.from_pretrained(model_name)
quantizer = INCQuantizer.from_pretrained(model, task=task)
save_onnx_model = False
quantized_model = None
with tempfile.TemporaryDirectory() as tmp_dir:
quantizer.quantize(
quantization_config=quantization_config,
save_directory=tmp_dir,
save_onnx_model=save_onnx_model,
)
for backend in ["torch", "ort"]:
if backend == "torch":
model = model_class.auto_model_class.from_pretrained(model_name)
else:
model = model_class.from_pretrained(model_name, export=True)

quantizer = INCQuantizer.from_pretrained(model, task=task)
quantizer.quantize(
quantization_config=quantization_config,
save_directory=tmp_dir,
save_onnx_model=save_onnx_model,
)
if backend == "torch":
quantized_model = quantizer._quantized_model

self.check_model_outputs(
q_model=quantizer._quantized_model,
q_model=quantized_model,
task=task,
tokenizer=tokenizer,
save_directory=tmp_dir,
expected_quantized_matmuls=expected_quantized_matmuls,
is_static=False,
load_onnx_model=save_onnx_model,
load_onnx_model=True,
load_inc_model=True,
)

@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
def test_static_quantization(self, task, model_name, expected_quantized_matmuls):
num_samples = 10
model = ORT_SUPPORTED_TASKS[task]["class"][0].auto_model_class.from_pretrained(model_name)
model_class = ORT_SUPPORTED_TASKS[task]["class"][0]
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
quantizer = INCQuantizer.from_pretrained(model, task=task)
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=num_samples)

save_onnx_model = False
op_type_dict = (
{"Embedding": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}}
if save_onnx_model
else None
)
quantization_config = PostTrainingQuantConfig(approach="static", op_type_dict=op_type_dict)
quantized_model = None

with tempfile.TemporaryDirectory() as tmp_dir:
quantizer.quantize(
quantization_config=quantization_config,
calibration_dataset=calibration_dataset,
save_directory=tmp_dir,
save_onnx_model=save_onnx_model,
)
for backend in ["torch", "ort"]:
if backend == "torch":
model = model_class.auto_model_class.from_pretrained(model_name)
else:
model = model_class.from_pretrained(model_name, export=True)
quantizer = INCQuantizer.from_pretrained(model, task=task)
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=num_samples)
quantizer.quantize(
quantization_config=quantization_config,
calibration_dataset=calibration_dataset,
save_directory=tmp_dir,
save_onnx_model=save_onnx_model,
)
if backend == "torch":
quantized_model = quantizer._quantized_model

self.check_model_outputs(
q_model=quantizer._quantized_model,
q_model=quantized_model,
task=task,
tokenizer=tokenizer,
save_directory=tmp_dir,
expected_quantized_matmuls=expected_quantized_matmuls,
is_static=True,
load_onnx_model=True,
load_inc_model=True,
num_samples=num_samples,
load_onnx_model=save_onnx_model,
)

@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
Expand Down
26 changes: 15 additions & 11 deletions tests/neural_compressor/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,10 @@
def num_quantized_matmul_onnx_model(onnx_model):
num_quantized_matmul = 0
for node in onnx_model.graph.node:
if "quantizelinear" == node.op_type.lower():
if "QuantizeLinear" in node.name:
num_quantized_matmul += 1
return num_quantized_matmul // 2

return num_quantized_matmul


def _preprocess_function(examples, tokenizer, column_name):
Expand Down Expand Up @@ -90,22 +91,31 @@ def check_model_outputs(
expected_quantized_matmuls,
is_static=True,
load_onnx_model=True,
load_inc_model=True,
num_samples=None,
file_name=ONNX_WEIGHTS_NAME,
):
tokens = tokenizer("This is a sample input", return_tensors="pt")
inc_model = eval(_HEAD_TO_AUTOMODELS[task]).from_pretrained(save_directory)

model_kwargs = (
{"decoder_file_name": file_name, "use_cache": False}
if task == "text-generation"
else {"file_name": file_name}
)
inc_config = INCConfig.from_pretrained(save_directory)
self.assertEqual(inc_config.save_onnx_model, load_onnx_model)

if num_samples is not None:
self.assertEqual(inc_config.quantization["dataset_num_samples"], num_samples)

with torch.no_grad():
model_outputs = q_model(**tokens)
outputs = model_outputs["logits"] if isinstance(model_outputs, dict) else model_outputs[0]
if load_inc_model:
inc_model = eval(_HEAD_TO_AUTOMODELS[task]).from_pretrained(save_directory)
inc_model_outputs = inc_model(**tokens)
self.assertTrue(torch.equal(outputs, inc_model_outputs["logits"]))
# self.assertEqual(inc_config.save_onnx_model, load_onnx_model)

if load_onnx_model:
onnx_model = onnx_load(os.path.join(save_directory, file_name))
num_quantized_matmul = num_quantized_matmul_onnx_model(onnx_model)
Expand All @@ -117,13 +127,7 @@ def check_model_outputs(
ort_model = ORT_SUPPORTED_TASKS[task]["class"][0].from_pretrained(save_directory, **model_kwargs)
ort_outputs = ort_model(**tokens)
self.assertTrue("logits" in ort_outputs)

with torch.no_grad():
model_outputs = q_model(**tokens)
inc_model_outputs = inc_model(**tokens)
outputs = model_outputs["logits"] if isinstance(model_outputs, dict) else model_outputs[0]
self.assertTrue(torch.equal(outputs, inc_model_outputs["logits"]))
# self.assertTrue(torch.allclose(ort_outputs.logits, inc_model_outputs.logits, atol=1e-4))
# self.assertTrue(torch.allclose(ort_outputs.logits, outputs, atol=1e-3))

@staticmethod
def get_trainer(
Expand Down

0 comments on commit 907871f

Please sign in to comment.