Skip to content

Commit

Permalink
Add compressor and new API (#82)
Browse files Browse the repository at this point in the history
* add intel neural compressor

* add new API

* add ignore_compilers to the new API

* Sparsity support (#80)

* Added CI/CD pipeline with GitHub Actions

* fix numpy version

* Added Dockerfile + DeepSparse support

* added loss comparison for sparsity

* remove dockerfile

* fix

* fix import error

* fix error with sparsity

* remove unnecessary space

Co-authored-by: Valerio Sofi <[email protected]>
Co-authored-by: Diego Fiori <[email protected]>

* add comments and sparseml

* deprecate old API versions

* deprecate old API versions

* rename metric

* change docstring

* adapt docstrings and steps

* adapt code to review comments and fix error with sparseml

* add non-compressed model as output of CompressorStep

* change version to 0.4.0

* improve code stability

* fix bug with HF

* fix behaviour for negative metric_drop

* Update readme

* Minor changes

* Delete benchmark from github

* edit docstrigns

* Add bladedisc support (#85)

* add bladedisc support

* remove patch

* fix errors in bladedisc optimizer

Authored-by: Valerio Sofi <[email protected]>

* fix small bug

* Add API example

Co-authored-by: morgoth95 <[email protected]>
Co-authored-by: Valerio Sofi <[email protected]>
Co-authored-by: Nebuly <[email protected]>
  • Loading branch information
4 people authored Jul 26, 2022
1 parent 09065cc commit 7842560
Show file tree
Hide file tree
Showing 41 changed files with 2,970 additions and 677 deletions.
255 changes: 76 additions & 179 deletions README.md

Large diffs are not rendered by default.

275 changes: 20 additions & 255 deletions nebullvm/api/frontend/huggingface.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,25 @@
from collections import OrderedDict
import warnings
from tempfile import TemporaryDirectory
from typing import (
Tuple,
Union,
List,
Iterable,
Dict,
Any,
Type,
Callable,
Optional,
Sequence,
)

import numpy as np
import torch

from nebullvm import optimize_torch_model
from nebullvm.api.frontend.utils import ifnone, QUANTIZATION_METRIC_MAP
from nebullvm.base import DataType, ModelCompiler
from nebullvm.inference_learners.base import (
PytorchBaseInferenceLearner,
InferenceLearnerWrapper,
LearnerMetadata,
from nebullvm.api.huggingface import (
_flatten_outputs,
_TransformerWrapper,
_get_output_structure_from_text,
HuggingFaceInferenceLearner,
_HFTextDataset,
)
from nebullvm.api.utils import ifnone, QUANTIZATION_METRIC_MAP
from nebullvm.base import DataType, ModelCompiler
from nebullvm.optimizers.extra import HuggingFaceOptimizer

try:
Expand All @@ -35,203 +31,6 @@
PreTrainedTokenizer = None


def _flatten_outputs(
outputs: Union[torch.Tensor, Iterable]
) -> List[torch.Tensor]:
new_outputs = []
for output in outputs:
if isinstance(output, torch.Tensor):
new_outputs.append(output)
else:
flatten_list = _flatten_outputs(output)
new_outputs.extend(flatten_list)
return new_outputs


class _TransformerWrapper(torch.nn.Module):
"""Class for wrappering the Transformers and give them an API compatible
with nebullvm. The class takes and input of the forward method positional
arguments and transform them in the input dictionaries needed by
transformers classes. At the end it also flattens their output.
"""

def __init__(
self,
core_model: torch.nn.Module,
encoded_input: Dict[str, torch.Tensor],
):
super().__init__()
self.core_model = core_model
self.inputs_types = OrderedDict()
for key, value in encoded_input.items():
self.inputs_types[key] = value.dtype

def forward(self, *args: torch.Tensor):
inputs = {
key: value for key, value in zip(self.inputs_types.keys(), args)
}
outputs = self.core_model(**inputs)
return tuple(_flatten_outputs(outputs.values()))


def _get_size_recursively(
tensor_tuple: Union[torch.Tensor, Tuple]
) -> List[int]:
if isinstance(tensor_tuple[0], torch.Tensor):
return [len(tensor_tuple)]
else:
inner_size = _get_size_recursively(tensor_tuple[0])
return [len(tensor_tuple), *inner_size]


def _get_output_structure(
text: str,
model: PreTrainedModel,
tokenizer: PreTrainedTokenizer,
tokenizer_args: Dict,
) -> Tuple[OrderedDict, Type]:
"""Function needed for saving in a dictionary the output structure of the
transformers model.
"""
encoded_input = tokenizer([text], **tokenizer_args)
output = model(**encoded_input)
structure = OrderedDict()
for key, value in output.items():
if isinstance(value, torch.Tensor):
structure[key] = None
else:
size = _get_size_recursively(value)
structure[key] = size
return structure, type(output)


def _restructure_output(
output: Tuple[torch.Tensor],
structure: OrderedDict,
output_type: Any = None,
):
"""Restructure the flatter output using the structure dictionary given as
input.
"""
output_dict = {}
idx = 0
for key, value in structure.items():
if value is None:
output_dict[key] = output[idx]
idx += 1
else:
output_dict[key] = (
np.array(
output[idx : int(np.prod(value)) + idx], # noqa E203
dtype=object,
)
.reshape(value)
.tolist()
)
idx += np.prod(value)
if output_type is not None:
return output_type(**output_dict)
return output_dict


class HuggingFaceInferenceLearner(InferenceLearnerWrapper):
"""Class wrapping an InferenceLearner model and giving to it the
huggingface interface.
The class fuse both the InterfaceLearner and HuggingFace interfaces, giving
to the final user a model which can be used whit the prefered API without
the need of adapting the previous code.
Attributes:
network_parameters (ModelParams): Model parameters of the model.
core_inference_learner (PytorchBaseInferenceLearner): Inference learner
built using the Pytorch interface.
output_structure (Dict): Original output structure of the HuggingFace
model.
input_names (List[str]): List of all the input keys used for the
original HuggingFace model.
output_type (Any, optional): Original output type of the HuggingFace
model.
"""

def __init__(
self,
core_inference_learner: PytorchBaseInferenceLearner,
output_structure: OrderedDict,
input_names: List[str],
output_type: Any = None,
):
super().__init__(core_inference_learner)
self.output_structure = output_structure
self.input_names = input_names
self.output_type = output_type

def _save_wrapper_extra_info(self):
pass

@staticmethod
def _load_wrapper_extra_info(builder_inputs: Dict) -> Dict:
return builder_inputs

def run(self, *args, **kwargs) -> Any:
"""Run the underlying optimized model for getting a prediction.
The method has an hybrid interface. It accepts inputs either as
positional or keyword arguments. If only positional arguments are given
the method expects the inputs to be in the canonical
nebullvm interface. If only keyword arguments are given the method
expects them to be in the HuggingFace interface. Mixed representation
is not allowed and will result in an error.
"""
if len(args) > 0 and len(kwargs) > 0:
raise RuntimeError(
"Not allowed usage of the predict method. "
"Either the positional or the keyword arguments must be given."
)
if len(args) > 0:
return self.core_inference_learner(*args)
inputs = (kwargs.pop(name) for name in self.input_names)
outputs = self.core_inference_learner(*inputs)
return _restructure_output(
outputs, self.output_structure, self.output_type
)

def _get_extra_metadata_kwargs(self) -> Dict:
metadata_kwargs = {
"output_structure": self.output_structure,
"output_structure_keys": list(self.output_structure.keys()),
"input_names": self.input_names,
}
if self.output_type is not None:
metadata_kwargs.update(
{
"output_type": self.output_type.__name__,
"output_type_module": self.output_type.__module__,
}
)
return metadata_kwargs

@staticmethod
def _convert_metadata_to_inputs(metadata: LearnerMetadata) -> Dict:
# we need to guarantee the preservation of the output structure
# elements order.
output_structure = OrderedDict()
for key in metadata["output_structure_keys"]:
output_structure[key] = metadata["output_structure"][key]

inputs = {
"output_structure": output_structure,
"input_names": metadata["input_names"],
}
if metadata["output_type"] is not None:
exec(
f"from {metadata['output_type_module']} "
f"import {metadata['output_type']}"
)
inputs["output_type"] = eval(metadata["output_type"])
return inputs


def _get_dynamic_axis(
text: str,
tokenizer: PreTrainedTokenizer,
Expand Down Expand Up @@ -302,45 +101,6 @@ def _get_extra_optimizer(
return [HuggingFaceOptimizer(hugging_face_params={})]


class _HFDataset(Sequence):
def __init__(
self,
input_texts: List,
ys: Optional[List],
keywords: List[str],
batch_size: int,
tokenizer: PreTrainedTokenizer,
tokenizer_args: Dict,
):
self._input_texts = input_texts
self._ys = ys
self._bs = batch_size
self._keys = keywords
self._tokenizer = tokenizer
if self._tokenizer.pad_token is None:
self._tokenizer.pad_token = self._tokenizer.eos_token
_tokenizer_args = {"truncation": True, "padding": True}
_tokenizer_args.update(tokenizer_args)
self._tokenizer_args = _tokenizer_args

def __getitem__(self, item: int):
pointer = self._bs * item
if pointer >= len(self):
raise IndexError
mini_batch = self._input_texts[
pointer : pointer + self._bs # noqa E203
]
if self._ys is not None:
mini_batch_y = self._ys[pointer : pointer + self._bs] # noqa E203
else:
mini_batch_y = None
encoded_inputs = self._tokenizer(mini_batch, **self._tokenizer_args)
return tuple(encoded_inputs[key] for key in self._keys), mini_batch_y

def __len__(self):
return len(self._input_texts)


def optimize_huggingface_model(
model: PreTrainedModel,
tokenizer: PreTrainedTokenizer,
Expand Down Expand Up @@ -371,7 +131,7 @@ def optimize_huggingface_model(
tokenizer (PreTrainedTokenizer): Tokenizer used for building model's
inputs.
input_texts (List[str]): Texts either from the training set or similar
to the ones contained in the text. If the perf_loss_ths is
to the ones contained in the text. If the metric_drop_ths is
passed the input_text will be used for computing the drop in
precision and for setting the quantization parameters. If you
selected a quantization metric needing the input labels you need to
Expand Down Expand Up @@ -409,24 +169,29 @@ def optimize_huggingface_model(
performed, since no data is given as input.
perf_metric (Union[Callable, str], optional): The metric to
be used for accepting or refusing a precision-reduction
optimization proposal. If none is given but a `perf_loss_ths` is
optimization proposal. If none is given but a `metric_drop_ths` is
received, the `nebullvm.measure.compute_relative_difference`
metric will be used as default one. A user-defined metric can
be passed as function accepting as inputs two tuples of tensors
(produced by the baseline and the quantized model) and the related
original labels.
For more information see
`nebullvm.measure.compute_relative_difference` and
`nebullvm.measure.compute_accuracy_drop`. `perf_metric`
`nebullvm.measure.compute_accuracy_drop`. `metric`
accepts as value also a string containing the metric name. At the
current stage the supported metrics are `"precision"` and
current stage the supported metrics are `"numeric_precision"` and
`"accuracy"`.
ys: List of target labels. For each input in `input_texts` there should
be the corresponding label. Note that this feature is just used for
estimating the accuracy drop while running precision-reduction
techniques. It will be ignored if these techniques are not
activated.
"""
warnings.warn(
"Deprecated: The usage of the HuggingFace api is deprecated. "
"`optimize_huggingface_model`will be removed from the next release. "
"Use `optimize_model` instead."
)
if perf_loss_ths is not None and ys is None and perf_metric == "accuracy":
raise ValueError(
"You cannot select the accuracy as quantization metric without "
Expand All @@ -436,7 +201,7 @@ def optimize_huggingface_model(
perf_metric = QUANTIZATION_METRIC_MAP.get(perf_metric)
tokenizer_args = tokenizer_args or {}
tokenizer_args.update({"return_tensors": "pt"})
output_structure, output_type = _get_output_structure(
output_structure, output_type = _get_output_structure_from_text(
text=input_texts[0],
model=model,
tokenizer=tokenizer,
Expand Down Expand Up @@ -470,7 +235,7 @@ def optimize_huggingface_model(
else None,
perf_loss_ths=perf_loss_ths,
perf_metric=perf_metric,
dataloader=_HFDataset(
dataloader=_HFTextDataset(
input_texts,
ys,
list(wrapper_model.inputs_types.keys()),
Expand Down
Loading

0 comments on commit 7842560

Please sign in to comment.