Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
aimprove deployment examples
Browse files Browse the repository at this point in the history
Signed-off-by: changwangss <[email protected]>
  • Loading branch information
changwangss committed Jun 14, 2024
1 parent 9d7b0ee commit 806fa0b
Show file tree
Hide file tree
Showing 41 changed files with 502 additions and 432 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function init_params {
model_name_or_path="bigcode/starcoder"
extra_cmd=""
batch_size=8
approach="PostTrainingStatic"
approach="static"
alpha=0.5
script="run_generation.py"
for var in "$@"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -583,29 +583,23 @@ def compute_metrics(eval_preds):
greater_is_better=False
)
trainer.metrics = tune_metric
tuning_criterion = TuningCriterion(max_trials=600)
accuracy_criterion = AccuracyCriterion(
higher_is_better=False, # optional.
criterion="relative" if optim_args.is_relative else "absolute", # optional. Available values are "relative" and "absolute".
tolerable_loss=optim_args.perf_tol, # optional.
)
if optim_args.quantization_approach != "qat":
tuning_criterion = TuningCriterion(max_trials=600)
accuracy_criterion = AccuracyCriterion(
higher_is_better=False, # optional.
criterion="relative" if optim_args.is_relative else "absolute", # optional. Available values are "relative" and "absolute".
tolerable_loss=optim_args.perf_tol, # optional.
)
quantization_config = PostTrainingQuantConfig(
approach=optim_args.quantization_approach,
tuning_criterion=tuning_criterion,
accuracy_criterion=accuracy_criterion
)
else:
tuning_criterion = TuningCriterion(max_trials=600)
accuracy_criterion = AccuracyCriterion(
higher_is_better=False, # optional.
criterion="relative" if optim_args.is_relative else "absolute", # optional. Available values are "relative" and "absolute".
tolerable_loss=optim_args.perf_tol, # optional.
)
quantization_config = QuantizationAwareTrainingConfig(
tuning_criterion=tuning_criterion,
accuracy_criterion=accuracy_criterion
)
)
early_stopping_patience = 2
early_stopping_threshold = 0.001 # optional
trainer.add_callback(transformers.EarlyStoppingCallback(early_stopping_patience, \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -602,29 +602,24 @@ def compute_metrics(eval_preds):
criterion=optim_args.perf_tol,
greater_is_better=False
)
trainer.metrics = tune_metric
tuning_criterion = TuningCriterion(max_trials=600)
accuracy_criterion = AccuracyCriterion(
higher_is_better=False, # optional.
criterion="relative" if optim_args.is_relative else "absolute", # optional. Available values are "relative" and "absolute".
tolerable_loss=optim_args.perf_tol, # optional.
)
if optim_args.quantization_approach != "qat":
tuning_criterion = TuningCriterion(max_trials=600)
accuracy_criterion = AccuracyCriterion(
higher_is_better=False, # optional.
criterion="relative" if optim_args.is_relative else "absolute", # optional. Available values are "relative" and "absolute".
tolerable_loss=optim_args.perf_tol, # optional.
)
quantization_config = PostTrainingQuantConfig(
approach=optim_args.quantization_approach,
tuning_criterion=tuning_criterion,
accuracy_criterion=accuracy_criterion
)
else:
tuning_criterion = TuningCriterion(max_trials=600)
accuracy_criterion = AccuracyCriterion(
higher_is_better=False, # optional.
criterion="relative" if optim_args.is_relative else "absolute", # optional. Available values are "relative" and "absolute".
tolerable_loss=optim_args.perf_tol, # optional.
)
quantization_config = QuantizationAwareTrainingConfig(
tuning_criterion=tuning_criterion,
accuracy_criterion=accuracy_criterion
)
)
early_stopping_patience = 2
early_stopping_threshold = 0.001 # optional
trainer.add_callback(transformers.EarlyStoppingCallback(early_stopping_patience, \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -549,29 +549,23 @@ def group_texts(examples):
greater_is_better=False
)
trainer.metrics = tune_metric
tuning_criterion = TuningCriterion(max_trials=600)
accuracy_criterion = AccuracyCriterion(
higher_is_better=False, # optional.
criterion="relative" if optim_args.is_relative else "absolute", # optional. Available values are "relative" and "absolute".
tolerable_loss=optim_args.perf_tol, # optional.
)
if optim_args.quantization_approach != "qat":
tuning_criterion = TuningCriterion(max_trials=600)
accuracy_criterion = AccuracyCriterion(
higher_is_better=False, # optional.
criterion="relative" if optim_args.is_relative else "absolute", # optional. Available values are "relative" and "absolute".
tolerable_loss=optim_args.perf_tol, # optional.
)
quantization_config = PostTrainingQuantConfig(
approach=optim_args.quantization_approach,
tuning_criterion=tuning_criterion,
accuracy_criterion=accuracy_criterion
)
else:
tuning_criterion = TuningCriterion(max_trials=600)
accuracy_criterion = AccuracyCriterion(
higher_is_better=False, # optional.
criterion="relative" if optim_args.is_relative else "absolute", # optional. Available values are "relative" and "absolute".
tolerable_loss=optim_args.perf_tol, # optional.
)
quantization_config = QuantizationAwareTrainingConfig(
tuning_criterion=tuning_criterion,
accuracy_criterion=accuracy_criterion
)
)
early_stopping_patience = 2
early_stopping_threshold = 0.001 # optional
trainer.add_callback(transformers.EarlyStoppingCallback(early_stopping_patience, \
Expand Down
2 changes: 1 addition & 1 deletion examples/huggingface/pytorch/optimization_README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Welcome to Pytorch Huggingface examples. The examples is following from [Hugging

## Quantization approach

| Task | PostTrainingDynamic | PostTrainingStatic | QuantizationAwareTraining
| Task | dynamic | static | qat
|---|:---:|:---:|:---:|
|**`language-modeling`**| ✅ | ✅ | ✅
|**`multi-choice`**| ✅ | ✅ | ✅
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ else
echo "========== Prepare Model ${MODEL_NAME_OR_PATH} with Precision ${PRECISION} ========"
mode_cmd=""
if [[ ${PRECISION} = 'int8' ]]; then
mode_cmd=$mode_cmd" --tune --quantization_approach PostTrainingStatic"
mode_cmd=$mode_cmd" --tune --quantization_approach static"
elif [[ ${PRECISION} = 'bf16' ]]; then
mode_cmd=$mode_cmd" --enable_bf16"
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,13 @@
import transformers
from dataclasses import dataclass, field
from datasets import load_dataset, load_metric
from intel_extension_for_transformers.transformers import metrics , OptimizedModel, QuantizationConfig
from intel_extension_for_transformers.transformers import metrics , OptimizedModel
from neural_compressor.config import (
PostTrainingQuantConfig,
QuantizationAwareTrainingConfig,
TuningCriterion,
AccuracyCriterion
)
from trainer_qa import QuestionAnsweringTrainer
from transformers import (
AutoConfig,
Expand Down Expand Up @@ -211,9 +217,9 @@ class OptimizationArguments:
metadata={"help": "Whether or not to apply quantization."},
)
quantization_approach: Optional[str] = field(
default="PostTrainingStatic",
metadata={"help": "Quantization approach. Supported approach are PostTrainingStatic, "
"PostTrainingDynamic and QuantizationAwareTraining."},
default="static",
metadata={"help": "Quantization approach. Supported approach are static, "
"dynamic and qat."},
)
metric_name: Optional[str] = field(
default="eval_f1",
Expand Down Expand Up @@ -646,25 +652,43 @@ def compute_metrics(p: EvalPrediction):
if not training_args.do_eval:
raise ValueError("do_eval must be set to True for quantization.")

if optim_args.quantization_approach != "PostTrainingDynamic":
if optim_args.quantization_approach != "dynamic":
if not training_args.do_train:
raise ValueError(
"do_train must be set to True for static and aware training quantization."
)
if optim_args.quantization_approach == "QuantizationAwareTraining":
early_stopping_patience = 6
early_stopping_threshold = 0.001 # optional
trainer.add_callback(transformers.EarlyStoppingCallback(early_stopping_patience,
early_stopping_threshold))

tune_metric = metrics.Metric(
name=metric_name, is_relative=optim_args.is_relative, criterion=optim_args.perf_tol
)
quantization_config = QuantizationConfig(
approach=optim_args.quantization_approach,
max_trials=200,
metrics=[tune_metric],
)
trainer.metrics = tune_metric
if optim_args.quantization_approach != "qat":
tuning_criterion = TuningCriterion(max_trials=600)
accuracy_criterion = AccuracyCriterion(
higher_is_better=False, # optional.
criterion="relative" if optim_args.is_relative else "absolute", # optional. Available values are "relative" and "absolute".
tolerable_loss=optim_args.perf_tol, # optional.
)
quantization_config = PostTrainingQuantConfig(
approach=optim_args.quantization_approach,
tuning_criterion=tuning_criterion,
accuracy_criterion=accuracy_criterion
)
else:
tuning_criterion = TuningCriterion(max_trials=600)
accuracy_criterion = AccuracyCriterion(
higher_is_better=False, # optional.
criterion="relative" if optim_args.is_relative else "absolute", # optional. Available values are "relative" and "absolute".
tolerable_loss=optim_args.perf_tol, # optional.
)
quantization_config = QuantizationAwareTrainingConfig(
tuning_criterion=tuning_criterion,
accuracy_criterion=accuracy_criterion
)
early_stopping_patience = 2
early_stopping_threshold = 0.001 # optional
trainer.add_callback(transformers.EarlyStoppingCallback(early_stopping_patience, \
early_stopping_threshold))
model = trainer.quantize(quant_config=quantization_config)

if optim_args.benchmark or optim_args.accuracy_only:
Expand All @@ -674,7 +698,7 @@ def compute_metrics(p: EvalPrediction):
max_eval_samples = data_args.max_eval_samples \
if data_args.max_eval_samples is not None else len(eval_dataset)
eval_samples = min(max_eval_samples, len(eval_dataset))
samples = eval_samples - (eval_samples % batch_size) \
samples = eval_samples - (eval_samples % training_args.per_device_eval_batch_size) \
if training_args.dataloader_drop_last else eval_samples
logger.info("metrics keys: {}".format(results.keys()))
bert_task_acc_keys = ['eval_f1', 'eval_accuracy', 'eval_matthews_correlation',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ else
echo "========== Prepare Model ${MODEL_NAME_OR_PATH} with Precision ${PRECISION} ========"
mode_cmd=""
if [[ ${PRECISION} = 'int8' ]]; then
mode_cmd=$mode_cmd" --tune --quantization_approach PostTrainingStatic"
mode_cmd=$mode_cmd" --tune --quantization_approach static"
elif [[ ${PRECISION} = 'fp32' ]]; then
mode_cmd=$mode_cmd" --fp32"
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
import transformers
from dataclasses import dataclass, field
from datasets import load_dataset, load_metric
from intel_extension_for_transformers.transformers import metrics , OptimizedModel, QuantizationConfig
from intel_extension_for_transformers.transformers import metrics , OptimizedModel
from neural_compressor.config import PostTrainingQuantConfig
from trainer_qa import QuestionAnsweringTrainer
from transformers import (
AutoConfig,
Expand Down Expand Up @@ -211,9 +212,9 @@ class OptimizationArguments:
metadata={"help": "Whether or not to apply quantization."},
)
quantization_approach: Optional[str] = field(
default="PostTrainingStatic",
metadata={"help": "Quantization approach. Supported approach are PostTrainingStatic, "
"PostTrainingDynamic and QuantizationAwareTraining."},
default="static",
metadata={"help": "Quantization approach. Supported approach are static, "
"dynamic."},
)
metric_name: Optional[str] = field(
default="eval_f1",
Expand Down Expand Up @@ -640,27 +641,21 @@ def compute_metrics(p: EvalPrediction):

trainer.save_model(training_args.output_dir)
trainer.calib_dataloader = trainer.get_eval_dataloader()
if optim_args.quantization_approach != "PostTrainingDynamic":
if optim_args.quantization_approach != "dynamic":
if not training_args.do_train:
raise ValueError(
"do_train must be set to True for static and aware training quantization."
)
if optim_args.quantization_approach == "QuantizationAwareTraining":
early_stopping_patience = 6
early_stopping_threshold = 0.001 # optional
trainer.add_callback(transformers.EarlyStoppingCallback(early_stopping_patience,
early_stopping_threshold))

tune_metric = metrics.Metric(
name=metric_name, is_relative=optim_args.is_relative, criterion=optim_args.perf_tol
)
quantization_config = QuantizationConfig(
trainer.metrics = tune_metric
quantization_config = PostTrainingQuantConfig(
backend="ipex",
approach=optim_args.quantization_approach,
max_trials=200,
metrics=[tune_metric],
use_bf16=False
excluded_precisions=["bf16"]
)
quantization_config.framework = "pytorch_ipex"
model = trainer.quantize(quant_config=quantization_config)

if optim_args.benchmark or optim_args.accuracy_only:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ else
echo "========== Prepare Model ${MODEL_NAME_OR_PATH} with Precision ${PRECISION} ========"
mode_cmd=""
if [[ ${PRECISION} = 'int8' ]]; then
mode_cmd=$mode_cmd" --tune --quantization_approach PostTrainingStatic"
mode_cmd=$mode_cmd" --tune --quantization_approach static"
elif [[ ${PRECISION} = 'fp32' ]]; then
mode_cmd=$mode_cmd" --fp32"
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
import transformers
from dataclasses import dataclass, field
from datasets import load_dataset, load_metric
from intel_extension_for_transformers.transformers import metrics , OptimizedModel, QuantizationConfig
from intel_extension_for_transformers.transformers import metrics , OptimizedModel
from neural_compressor.config import PostTrainingQuantConfig
from trainer_qa import QuestionAnsweringTrainer
from transformers import (
AutoConfig,
Expand Down Expand Up @@ -211,9 +212,9 @@ class OptimizationArguments:
metadata={"help": "Whether or not to apply quantization."},
)
quantization_approach: Optional[str] = field(
default="PostTrainingStatic",
metadata={"help": "Quantization approach. Supported approach are PostTrainingStatic, "
"PostTrainingDynamic and QuantizationAwareTraining."},
default="static",
metadata={"help": "Quantization approach. Supported approach are static, "
"dynamic."},
)
metric_name: Optional[str] = field(
default="eval_f1",
Expand Down Expand Up @@ -640,28 +641,21 @@ def compute_metrics(p: EvalPrediction):

trainer.save_model(training_args.output_dir)
trainer.calib_dataloader = trainer.get_eval_dataloader()
if optim_args.quantization_approach != "PostTrainingDynamic":
if optim_args.quantization_approach != "dynamic":
if not training_args.do_train:
raise ValueError(
"do_train must be set to True for static and aware training quantization."
)

elif optim_args.quantization_approach == "QuantizationAwareTraining":
early_stopping_patience = 6
early_stopping_threshold = 0.001 # optional
trainer.add_callback(transformers.EarlyStoppingCallback(early_stopping_patience,
early_stopping_threshold))

tune_metric = metrics.Metric(
name=metric_name, is_relative=optim_args.is_relative, criterion=optim_args.perf_tol
)
quantization_config = QuantizationConfig(
trainer.metrics = tune_metric
quantization_config = PostTrainingQuantConfig(
backend="ipex",
approach=optim_args.quantization_approach,
max_trials=200,
metrics=[tune_metric],
use_bf16=False
excluded_precisions=["bf16"]
)
quantization_config.framework = "pytorch_ipex"
model = trainer.quantize(quant_config=quantization_config)

if optim_args.benchmark or optim_args.accuracy_only:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ else
echo "========== Prepare Model ${MODEL_NAME_OR_PATH} with Precision ${PRECISION} ========"
mode_cmd=""
if [[ ${PRECISION} = 'int8' ]]; then
mode_cmd=$mode_cmd" --tune --quantization_approach PostTrainingStatic"
mode_cmd=$mode_cmd" --tune --quantization_approach static"
elif [[ ${PRECISION} = 'fp32' ]]; then
mode_cmd=$mode_cmd" --fp32"
fi
Expand Down
Loading

0 comments on commit 806fa0b

Please sign in to comment.