From 27145c17163ae4e08a7150dcd85b33a977c54357 Mon Sep 17 00:00:00 2001 From: changwangss Date: Mon, 17 Jun 2024 06:29:48 -0700 Subject: [PATCH] remove code Signed-off-by: changwangss --- docs/distillation.md | 1 + docs/pruning.md | 41 ++++++------------- .../transformers/trainer.py | 3 -- 3 files changed, 14 insertions(+), 31 deletions(-) diff --git a/docs/distillation.md b/docs/distillation.md index 222259f837c..b74c5349f15 100644 --- a/docs/distillation.md +++ b/docs/distillation.md @@ -56,6 +56,7 @@ from neural_compressor.config import DistillationConfig # trainer = transformers.Trainer(......) trainer = NLPTrainer(......) metric = metrics.Metric(name="eval_accuracy") +trainer.metrics = metric d_conf = DistillationConfig(teacher_model=teacher_model, criterion=criterion) model = trainer.distill(distillation_config=d_conf) ``` diff --git a/docs/pruning.md b/docs/pruning.md index f7bdfe86cb6..f5909bb2008 100644 --- a/docs/pruning.md +++ b/docs/pruning.md @@ -7,32 +7,23 @@ Pruning ## Introduction Pruning is the process of removing redundant parameters of a network. The idea bears similarity to the ["optimal brain damage"](http://yann.lecun.com/exdb/publis/pdf/lecun-90b.pdf) hypothesis by Yann LeCun. There are two types of pruning: Unstructured and Structured. Unstructured pruning means finding and removing the less salient connection in the model, the place could be anywhere in the matrix. Structured pruning means deleting entire blocks, filters, or channels. -## Pruning types - -There are three pruning types in Intel® Extension for Transformers: - -- Magnitude (Unstructured) - - The algorithm prunes the weight by the lowest absolute value at each layer with a given sparsity target. - -- Group Lasso (Structured) - - The algorithm uses Group lasso regularization to prune entire rows, columns, or blocks of parameters that result in a smaller dense network. - -- Pattern Lock (Unstructured & Structured) - - The algorithm locks the sparsity pattern in fine tune phase by freezing those zero values of the weight tensor during the weight update of training. - ## Usage ### Script: ```python -from intel_extension_for_transformers.transformers import metrics, objectives, PrunerConfig, PruningConfig, +from intel_extension_for_transformers.transformers import metrics +from neural_compressor.config import WeightPruningConfig from intel_extension_for_transformers.transformers.trainer import NLPTrainer # Replace transformers.Trainer with NLPTrainer # trainer = transformers.Trainer(......) trainer = NLPTrainer(......) metric = metrics.Metric(name="eval_accuracy") -pruner_config = PrunerConfig(prune_type='BasicMagnitude', target_sparsity_ratio=0.9) -p_conf = PruningConfig(pruner_config=[pruner_config], metrics=metric) -model = trainer.prune(pruning_config=p_conf) +trainer.metrics = tune_metric +pruning_conf = WeightPruningConfig([{"start_step": 0, "end_step": 2}], + target_sparsity=0,9, + pruning_scope="local", + pruning_type="magnitude") +model = trainer.prune(pruning_config=pruning_conf) ``` Please refer to [example](../examples/huggingface/pytorch/text-classification/pruning) for the details. @@ -46,28 +37,22 @@ The Metric defines which metric will be used to measure the performance of tuned Please refer to [metrics document](metrics.md) for the details. ### Create an instance of WeightPruningConfig -PrunerConfig defines which pruning algorithm to use and how to apply it during the training process. Intel® Extension for Transformers supports pruning types "BasicMagnitude", "PatternLock", and "GroupLasso". You can create different pruners for different layers. +[WeightPruningConfig](neural-compressor_neural_compressor_config.py at master · intel_neural-compressor.html) defines which pruning algorithm to use and how to apply it during the training process. Intel® Extension for Transformers supports pruning types "magnitude", "pattern_lock", and "GroupLasso". You can create different pruners for different layers. - arguments: |Argument |Type |Description |Default value | |:----------|:----------|:-----------------------------------------------|:----------------| - |epoch_range|list of integer|Which epochs to pruning |[0, 4] | - |initial_sparsity_ratio|float |Initial sparsity goal |0.0 | - |target_sparsity_ratio|float |Target sparsity goal |0.97 | + |pruning_configs |list of dicts|Which epochs to pruning |[{}] | + |target_sparsity |float |Initial sparsity goal |0.90 | |update_frequency|integer|Frequency to updating sparsity |1 | - |prune_type|string|Pruning algorithm |'BasicMagnitude' | - |method|string|Pruning method |'per_tensor' | - |names|list of string|List of weight name to be pruned. If no weight is specified, all weights of the model will be pruned|[]| - |parameters|dict of string|The hyper-parameters for pruning, refer to [the link](https://github.com/intel/neural-compressor/blob/master/docs/source/pruning.md)|None| + |pruning_type |string|Pruning algorithm |'snip_momentum' | + The WeightPruningConfig contains all the information related to the model pruning behavior. If you have created Metric and WeightPruningConfig instance, then you can create an instance of WeightPruningConfig. Metric and pruner are optional. - example: ```python from neural_compressor.config import WeightPruningConfig - - metric = metrics.Metric(name="eval_accuracy") - trainer.metrics = tune_metric pruning_conf = WeightPruningConfig([{"start_step": 0, "end_step": 2}], target_sparsity=0,9, pruning_scope="local", diff --git a/intel_extension_for_transformers/transformers/trainer.py b/intel_extension_for_transformers/transformers/trainer.py index e3899e2a41f..0d14d3843e4 100644 --- a/intel_extension_for_transformers/transformers/trainer.py +++ b/intel_extension_for_transformers/transformers/trainer.py @@ -306,9 +306,6 @@ def quantize( if calib_dataloader is not None: self._calib_dataloader = calib_dataloader - if self.quantizer is None: - self._provider = Provider[provider.upper()].value - if self._provider == Provider.INC.value: return self._inc_quantize(quant_config=quant_config, provider=provider) else: