diff --git a/README.md b/README.md index fd487be94a..e06f91ef17 100644 --- a/README.md +++ b/README.md @@ -67,14 +67,6 @@ For more details on the supported compression techniques, please refer to the [d Below are the examples of how to use OpenVINO and its [NNCF](https://docs.openvino.ai/latest/tmo_introduction.html) framework to accelerate inference. -#### Export: - -It is possible to export your model to the [OpenVINO](https://docs.openvino.ai/2023.1/openvino_ir.html) IR format easily: - -```plain -optimum-cli export openvino --model distilbert-base-uncased-finetuned-sst-2-english ov_distilbert -``` - #### Inference: To load a model and run inference with OpenVINO Runtime, you can just replace your `AutoModelForXxx` class with the corresponding `OVModelForXxx` class. diff --git a/docs/source/optimization_inc.mdx b/docs/source/optimization_inc.mdx index de3be5f9ec..0867107661 100644 --- a/docs/source/optimization_inc.mdx +++ b/docs/source/optimization_inc.mdx @@ -137,43 +137,43 @@ The `INCTrainer` is very similar to the 🤗 Transformers [`Trainer`](https://hu To apply quantization during training, you only need to create the appropriate configuration and pass it to the `INCTrainer`. ```diff -import evaluate -import numpy as np -from datasets import load_dataset -from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, default_data_collator + import evaluate + import numpy as np + from datasets import load_dataset + from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, default_data_collator - from transformers import Trainer + from optimum.intel import INCModelForSequenceClassification, INCTrainer + from neural_compressor import QuantizationAwareTrainingConfig -model_id = "distilbert-base-uncased-finetuned-sst-2-english" -model = AutoModelForSequenceClassification.from_pretrained(model_id) -tokenizer = AutoTokenizer.from_pretrained(model_id) -dataset = load_dataset("glue", "sst2") -dataset = dataset.map(lambda examples: tokenizer(examples["sentence"], padding=True, max_length=128), batched=True) -metric = evaluate.load("glue", "sst2") -compute_metrics = lambda p: metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids) + model_id = "distilbert-base-uncased-finetuned-sst-2-english" + model = AutoModelForSequenceClassification.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) + dataset = load_dataset("glue", "sst2") + dataset = dataset.map(lambda examples: tokenizer(examples["sentence"], padding=True, max_length=128), batched=True) + metric = evaluate.load("glue", "sst2") + compute_metrics = lambda p: metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids) -# The directory where the quantized model will be saved -save_dir = "quantized_model" + # The directory where the quantized model will be saved + save_dir = "quantized_model" -# The configuration detailing the quantization process -+quantization_config = QuantizationAwareTrainingConfig() + # The configuration detailing the quantization process ++ quantization_config = QuantizationAwareTrainingConfig() - trainer = Trainer( + trainer = INCTrainer( - model=model, -+ quantization_config=quantization_config, - args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=False), - train_dataset=dataset["train"].select(range(300)), - eval_dataset=dataset["validation"], - compute_metrics=compute_metrics, - tokenizer=tokenizer, - data_collator=default_data_collator, -) - -train_result = trainer.train() -metrics = trainer.evaluate() -trainer.save_model() + model=model, ++ quantization_config=quantization_config, + args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=False), + train_dataset=dataset["train"].select(range(300)), + eval_dataset=dataset["validation"], + compute_metrics=compute_metrics, + tokenizer=tokenizer, + data_collator=default_data_collator, + ) + + train_result = trainer.train() + metrics = trainer.evaluate() + trainer.save_model() - model = AutoModelForSequenceClassification.from_pretrained(save_dir) + model = INCModelForSequenceClassification.from_pretrained(save_dir) @@ -190,32 +190,32 @@ At the moment, pruning is applied on both the linear and the convolutional layer + from optimum.intel import INCTrainer + from neural_compressor import WeightPruningConfig -# The configuration detailing the pruning process + # The configuration detailing the pruning process + pruning_config = WeightPruningConfig( -+ pruning_type="magnitude", -+ start_step=0, -+ end_step=15, -+ target_sparsity=0.2, -+ pruning_scope="local", ++ pruning_type="magnitude", ++ start_step=0, ++ end_step=15, ++ target_sparsity=0.2, ++ pruning_scope="local", + ) - trainer = Trainer( + trainer = INCTrainer( - model=model, -+ pruning_config=pruning_config, - args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=False), - train_dataset=dataset["train"].select(range(300)), - eval_dataset=dataset["validation"], - compute_metrics=compute_metrics, - tokenizer=tokenizer, - data_collator=default_data_collator, -) - -train_result = trainer.train() -metrics = trainer.evaluate() -trainer.save_model() - -model = AutoModelForSequenceClassification.from_pretrained(save_dir) + model=model, ++ pruning_config=pruning_config, + args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=False), + train_dataset=dataset["train"].select(range(300)), + eval_dataset=dataset["validation"], + compute_metrics=compute_metrics, + tokenizer=tokenizer, + data_collator=default_data_collator, + ) + + train_result = trainer.train() + metrics = trainer.evaluate() + trainer.save_model() + + model = AutoModelForSequenceClassification.from_pretrained(save_dir) ``` ### Knowledge distillation @@ -233,21 +233,21 @@ To know more about the different supported methodologies, you can refer to the N - trainer = Trainer( + trainer = INCTrainer( - model=model, -+ distillation_config=distillation_config, - args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=False), - train_dataset=dataset["train"].select(range(300)), - eval_dataset=dataset["validation"], - compute_metrics=compute_metrics, - tokenizer=tokenizer, - data_collator=default_data_collator, -) - -train_result = trainer.train() -metrics = trainer.evaluate() -trainer.save_model() - -model = AutoModelForSequenceClassification.from_pretrained(save_dir) + model=model, ++ distillation_config=distillation_config, + args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=False), + train_dataset=dataset["train"].select(range(300)), + eval_dataset=dataset["validation"], + compute_metrics=compute_metrics, + tokenizer=tokenizer, + data_collator=default_data_collator, + ) + + train_result = trainer.train() + metrics = trainer.evaluate() + trainer.save_model() + + model = AutoModelForSequenceClassification.from_pretrained(save_dir) ``` ## Loading a quantized model