Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix format documentation #441

Merged
merged 3 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 43 additions & 44 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,8 @@ To load a quantized model hosted locally or on the 🤗 hub, you can do as follo
```python
from optimum.intel import INCModelForSequenceClassification

# Load the PyTorch model hosted on the hub
loaded_model_from_hub = INCModelForSequenceClassification.from_pretrained(
"Intel/distilbert-base-uncased-finetuned-sst-2-english-int8-dynamic"
)
model_id = "Intel/distilbert-base-uncased-finetuned-sst-2-english-int8-dynamic"
model = INCModelForSequenceClassification.from_pretrained(model_id)
```

You can load many more quantized models hosted on the hub under the Intel organization [`here`](https://huggingface.co/Intel).
Expand All @@ -77,15 +75,16 @@ If you want to load a PyTorch checkpoint, set `export=True` to convert your mode
```diff
- from transformers import AutoModelForSequenceClassification
+ from optimum.intel import OVModelForSequenceClassification
from transformers import AutoTokenizer, pipeline
from transformers import AutoTokenizer, pipeline

model_id = "distilbert-base-uncased-finetuned-sst-2-english"
model_id = "distilbert-base-uncased-finetuned-sst-2-english"
- model = AutoModelForSequenceClassification.from_pretrained(model_id)
+ model = OVModelForSequenceClassification.from_pretrained(model_id, export=True)
tokenizer = AutoTokenizer.from_pretrained(model_id)
cls_pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
text = "He's a dreadful magician."
outputs = cls_pipe(text)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model.save_pretrained("./distilbert")

classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
results = classifier("He's a dreadful magician.")
```

#### Post-training static quantization:
Expand All @@ -98,7 +97,7 @@ from optimum.intel import OVQuantizer, OVModelForSequenceClassification
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_id = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
def preprocess_fn(examples, tokenizer):
return tokenizer(
Expand Down Expand Up @@ -127,46 +126,46 @@ optimized_model = OVModelForSequenceClassification.from_pretrained(save_dir)
Quantization aware training (QAT) is applied in order to simulate the effects of quantization during training, to alleviate its effects on the model’s accuracy. Here is an example on how to fine-tune a DistilBERT model on the sst-2 task while applying quantization aware training (QAT).

```diff
import evaluate
import numpy as np
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, default_data_collator
import evaluate
import numpy as np
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, default_data_collator
- from transformers import Trainer
+ from optimum.intel import OVConfig, OVModelForSequenceClassification, OVTrainer

model_id = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
dataset = load_dataset("glue", "sst2")
dataset = dataset.map(
lambda examples: tokenizer(examples["sentence"], padding=True, truncation=True, max_length=128), batched=True
)
metric = evaluate.load("glue", "sst2")
compute_metrics = lambda p: metric.compute(
predictions=np.argmax(p.predictions, axis=1), references=p.label_ids
)

# The directory where the quantized model will be saved
save_dir = "nncf_results"

# Load the default quantization configuration detailing the quantization we wish to apply
model_id = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
dataset = load_dataset("glue", "sst2")
dataset = dataset.map(
lambda examples: tokenizer(examples["sentence"], padding=True, truncation=True, max_length=128), batched=True
)
metric = evaluate.load("glue", "sst2")
compute_metrics = lambda p: metric.compute(
predictions=np.argmax(p.predictions, axis=1), references=p.label_ids
)

# The directory where the quantized model will be saved
save_dir = "nncf_results"

# Load the default quantization configuration detailing the quantization we wish to apply
+ ov_config = OVConfig()

- trainer = Trainer(
+ trainer = OVTrainer(
model=model,
args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=True),
train_dataset=dataset["train"].select(range(300)),
eval_dataset=dataset["validation"],
compute_metrics=compute_metrics,
tokenizer=tokenizer,
data_collator=default_data_collator,
+ ov_config=ov_config,
+ task="text-classification",
)
train_result = trainer.train()
metrics = trainer.evaluate()
trainer.save_model()
model=model,
args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=True),
train_dataset=dataset["train"].select(range(300)),
eval_dataset=dataset["validation"],
compute_metrics=compute_metrics,
tokenizer=tokenizer,
data_collator=default_data_collator,
+ ov_config=ov_config,
+ task="text-classification",
)
train_result = trainer.train()
metrics = trainer.evaluate()
trainer.save_model()

+ optimized_model = OVModelForSequenceClassification.from_pretrained(save_dir)
```
Expand Down
12 changes: 6 additions & 6 deletions docs/source/inference.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ Here is an example on how to perform inference with OpenVINO Runtime for a text
```diff
- from transformers import AutoModelForSequenceClassification
+ from optimum.intel import OVModelForSequenceClassification
from transformers import AutoTokenizer, pipeline
from transformers import AutoTokenizer, pipeline

model_id = "distilbert-base-uncased-finetuned-sst-2-english"
model_id = "distilbert-base-uncased-finetuned-sst-2-english"
- model = AutoModelForSequenceClassification.from_pretrained(model_id)
+ model = OVModelForSequenceClassification.from_pretrained(model_id, export=True)
tokenizer = AutoTokenizer.from_pretrained(model_id)
cls_pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
outputs = cls_pipe("He's a dreadful magician.")
tokenizer = AutoTokenizer.from_pretrained(model_id)
cls_pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
outputs = cls_pipe("He's a dreadful magician.")

[{'label': 'NEGATIVE', 'score': 0.9919503927230835}]
[{'label': 'NEGATIVE', 'score': 0.9919503927230835}]
```

See the [reference documentation](reference_ov) for more information about parameters, and examples for different tasks.
Expand Down
128 changes: 64 additions & 64 deletions docs/source/optimization_inc.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -137,43 +137,43 @@ The `INCTrainer` is very similar to the 🤗 Transformers [`Trainer`](https://hu
To apply quantization during training, you only need to create the appropriate configuration and pass it to the `INCTrainer`.

```diff
import evaluate
import numpy as np
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, default_data_collator
import evaluate
import numpy as np
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, default_data_collator
- from transformers import Trainer
+ from optimum.intel import INCModelForSequenceClassification, INCTrainer
+ from neural_compressor import QuantizationAwareTrainingConfig

model_id = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
dataset = load_dataset("glue", "sst2")
dataset = dataset.map(lambda examples: tokenizer(examples["sentence"], padding=True, max_length=128), batched=True)
metric = evaluate.load("glue", "sst2")
compute_metrics = lambda p: metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)
model_id = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
dataset = load_dataset("glue", "sst2")
dataset = dataset.map(lambda examples: tokenizer(examples["sentence"], padding=True, max_length=128), batched=True)
metric = evaluate.load("glue", "sst2")
compute_metrics = lambda p: metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

# The directory where the quantized model will be saved
save_dir = "quantized_model"
# The directory where the quantized model will be saved
save_dir = "quantized_model"

# The configuration detailing the quantization process
+quantization_config = QuantizationAwareTrainingConfig()
# The configuration detailing the quantization process
+ quantization_config = QuantizationAwareTrainingConfig()

- trainer = Trainer(
+ trainer = INCTrainer(
model=model,
+ quantization_config=quantization_config,
args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=False),
train_dataset=dataset["train"].select(range(300)),
eval_dataset=dataset["validation"],
compute_metrics=compute_metrics,
tokenizer=tokenizer,
data_collator=default_data_collator,
)

train_result = trainer.train()
metrics = trainer.evaluate()
trainer.save_model()
model=model,
+ quantization_config=quantization_config,
args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=False),
train_dataset=dataset["train"].select(range(300)),
eval_dataset=dataset["validation"],
compute_metrics=compute_metrics,
tokenizer=tokenizer,
data_collator=default_data_collator,
)

train_result = trainer.train()
metrics = trainer.evaluate()
trainer.save_model()

- model = AutoModelForSequenceClassification.from_pretrained(save_dir)
+ model = INCModelForSequenceClassification.from_pretrained(save_dir)
Expand All @@ -190,32 +190,32 @@ At the moment, pruning is applied on both the linear and the convolutional layer
+ from optimum.intel import INCTrainer
+ from neural_compressor import WeightPruningConfig

# The configuration detailing the pruning process
# The configuration detailing the pruning process
+ pruning_config = WeightPruningConfig(
+ pruning_type="magnitude",
+ start_step=0,
+ end_step=15,
+ target_sparsity=0.2,
+ pruning_scope="local",
+ pruning_type="magnitude",
+ start_step=0,
+ end_step=15,
+ target_sparsity=0.2,
+ pruning_scope="local",
+ )

- trainer = Trainer(
+ trainer = INCTrainer(
model=model,
+ pruning_config=pruning_config,
args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=False),
train_dataset=dataset["train"].select(range(300)),
eval_dataset=dataset["validation"],
compute_metrics=compute_metrics,
tokenizer=tokenizer,
data_collator=default_data_collator,
)

train_result = trainer.train()
metrics = trainer.evaluate()
trainer.save_model()

model = AutoModelForSequenceClassification.from_pretrained(save_dir)
model=model,
+ pruning_config=pruning_config,
args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=False),
train_dataset=dataset["train"].select(range(300)),
eval_dataset=dataset["validation"],
compute_metrics=compute_metrics,
tokenizer=tokenizer,
data_collator=default_data_collator,
)

train_result = trainer.train()
metrics = trainer.evaluate()
trainer.save_model()

model = AutoModelForSequenceClassification.from_pretrained(save_dir)
```
### Knowledge distillation

Expand All @@ -233,21 +233,21 @@ To know more about the different supported methodologies, you can refer to the N

- trainer = Trainer(
+ trainer = INCTrainer(
model=model,
+ distillation_config=distillation_config,
args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=False),
train_dataset=dataset["train"].select(range(300)),
eval_dataset=dataset["validation"],
compute_metrics=compute_metrics,
tokenizer=tokenizer,
data_collator=default_data_collator,
)

train_result = trainer.train()
metrics = trainer.evaluate()
trainer.save_model()

model = AutoModelForSequenceClassification.from_pretrained(save_dir)
model=model,
+ distillation_config=distillation_config,
args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=False),
train_dataset=dataset["train"].select(range(300)),
eval_dataset=dataset["validation"],
compute_metrics=compute_metrics,
tokenizer=tokenizer,
data_collator=default_data_collator,
)

train_result = trainer.train()
metrics = trainer.evaluate()
trainer.save_model()

model = AutoModelForSequenceClassification.from_pretrained(save_dir)
```

## Loading a quantized model
Expand Down
Loading
Loading