Add compressor and new API (#82)

* add intel neural compressor * add new API * add ignore_compilers to the new API * Sparsity support (#80) * Added CI/CD pipeline with GitHub Actions * fix numpy version * Added Dockerfile + DeepSparse support * added loss comparison for sparsity * remove dockerfile * fix * fix import error * fix error with sparsity * remove unnecessary space Co-authored-by: Valerio Sofi <[email protected]> Co-authored-by: Diego Fiori <[email protected]> * add comments and sparseml * deprecate old API versions * deprecate old API versions * rename metric * change docstring * adapt docstrings and steps * adapt code to review comments and fix error with sparseml * add non-compressed model as output of CompressorStep * change version to 0.4.0 * improve code stability * fix bug with HF * fix behaviour for negative metric_drop * Update readme * Minor changes * Delete benchmark from github * edit docstrigns * Add bladedisc support (#85) * add bladedisc support * remove patch * fix errors in bladedisc optimizer Authored-by: Valerio Sofi <[email protected]> * fix small bug * Add API example Co-authored-by: morgoth95 <[email protected]> Co-authored-by: Valerio Sofi <[email protected]> Co-authored-by: Nebuly <[email protected]>
nebuly-ai · Jul 26, 2022 · 7842560 · 7842560
1 parent 09065cc
commit 7842560
Show file tree

Hide file tree

Showing 41 changed files with 2,970 additions and 677 deletions.
diff --git a/README.md b/README.md
diff --git a/nebullvm/api/frontend/huggingface.py b/nebullvm/api/frontend/huggingface.py
@@ -1,29 +1,25 @@
-from collections import OrderedDict
+import warnings
 from tempfile import TemporaryDirectory
 from typing import (
     Tuple,
     Union,
     List,
-    Iterable,
     Dict,
-    Any,
-    Type,
     Callable,
-    Optional,
-    Sequence,
 )
 
-import numpy as np
 import torch
 
 from nebullvm import optimize_torch_model
-from nebullvm.api.frontend.utils import ifnone, QUANTIZATION_METRIC_MAP
-from nebullvm.base import DataType, ModelCompiler
-from nebullvm.inference_learners.base import (
-    PytorchBaseInferenceLearner,
-    InferenceLearnerWrapper,
-    LearnerMetadata,
+from nebullvm.api.huggingface import (
+    _flatten_outputs,
+    _TransformerWrapper,
+    _get_output_structure_from_text,
+    HuggingFaceInferenceLearner,
+    _HFTextDataset,
 )
+from nebullvm.api.utils import ifnone, QUANTIZATION_METRIC_MAP
+from nebullvm.base import DataType, ModelCompiler
 from nebullvm.optimizers.extra import HuggingFaceOptimizer
 
 try:
@@ -35,203 +31,6 @@
     PreTrainedTokenizer = None
 
 
-def _flatten_outputs(
-    outputs: Union[torch.Tensor, Iterable]
-) -> List[torch.Tensor]:
-    new_outputs = []
-    for output in outputs:
-        if isinstance(output, torch.Tensor):
-            new_outputs.append(output)
-        else:
-            flatten_list = _flatten_outputs(output)
-            new_outputs.extend(flatten_list)
-    return new_outputs
-
-
-class _TransformerWrapper(torch.nn.Module):
-    """Class for wrappering the Transformers and give them an API compatible
-    with nebullvm. The class takes and input of the forward method positional
-    arguments and transform them in the input dictionaries needed by
-    transformers classes. At the end it also flattens their output.
-    """
-
-    def __init__(
-        self,
-        core_model: torch.nn.Module,
-        encoded_input: Dict[str, torch.Tensor],
-    ):
-        super().__init__()
-        self.core_model = core_model
-        self.inputs_types = OrderedDict()
-        for key, value in encoded_input.items():
-            self.inputs_types[key] = value.dtype
-
-    def forward(self, *args: torch.Tensor):
-        inputs = {
-            key: value for key, value in zip(self.inputs_types.keys(), args)
-        }
-        outputs = self.core_model(**inputs)
-        return tuple(_flatten_outputs(outputs.values()))
-
-
-def _get_size_recursively(
-    tensor_tuple: Union[torch.Tensor, Tuple]
-) -> List[int]:
-    if isinstance(tensor_tuple[0], torch.Tensor):
-        return [len(tensor_tuple)]
-    else:
-        inner_size = _get_size_recursively(tensor_tuple[0])
-        return [len(tensor_tuple), *inner_size]
-
-
-def _get_output_structure(
-    text: str,
-    model: PreTrainedModel,
-    tokenizer: PreTrainedTokenizer,
-    tokenizer_args: Dict,
-) -> Tuple[OrderedDict, Type]:
-    """Function needed for saving in a dictionary the output structure of the
-    transformers model.
-    """
-    encoded_input = tokenizer([text], **tokenizer_args)
-    output = model(**encoded_input)
-    structure = OrderedDict()
-    for key, value in output.items():
-        if isinstance(value, torch.Tensor):
-            structure[key] = None
-        else:
-            size = _get_size_recursively(value)
-            structure[key] = size
-    return structure, type(output)
-
-
-def _restructure_output(
-    output: Tuple[torch.Tensor],
-    structure: OrderedDict,
-    output_type: Any = None,
-):
-    """Restructure the flatter output using the structure dictionary given as
-    input.
-    """
-    output_dict = {}
-    idx = 0
-    for key, value in structure.items():
-        if value is None:
-            output_dict[key] = output[idx]
-            idx += 1
-        else:
-            output_dict[key] = (
-                np.array(
-                    output[idx : int(np.prod(value)) + idx],  # noqa E203
-                    dtype=object,
-                )
-                .reshape(value)
-                .tolist()
-            )
-            idx += np.prod(value)
-    if output_type is not None:
-        return output_type(**output_dict)
-    return output_dict
-
-
-class HuggingFaceInferenceLearner(InferenceLearnerWrapper):
-    """Class wrapping an InferenceLearner model and giving to it the
-    huggingface interface.
-
-    The class fuse both the InterfaceLearner and HuggingFace interfaces, giving
-    to the final user a model which can be used whit the prefered API without
-    the need of adapting the previous code.
-
-    Attributes:
-        network_parameters (ModelParams): Model parameters of the model.
-        core_inference_learner (PytorchBaseInferenceLearner): Inference learner
-            built using the Pytorch interface.
-        output_structure (Dict): Original output structure of the HuggingFace
-            model.
-        input_names (List[str]): List of all the input keys used for the
-            original HuggingFace model.
-        output_type (Any, optional): Original output type of the HuggingFace
-            model.
-    """
-
-    def __init__(
-        self,
-        core_inference_learner: PytorchBaseInferenceLearner,
-        output_structure: OrderedDict,
-        input_names: List[str],
-        output_type: Any = None,
-    ):
-        super().__init__(core_inference_learner)
-        self.output_structure = output_structure
-        self.input_names = input_names
-        self.output_type = output_type
-
-    def _save_wrapper_extra_info(self):
-        pass
-
-    @staticmethod
-    def _load_wrapper_extra_info(builder_inputs: Dict) -> Dict:
-        return builder_inputs
-
-    def run(self, *args, **kwargs) -> Any:
-        """Run the underlying optimized model for getting a prediction.
-
-        The method has an hybrid interface. It accepts inputs either as
-        positional or keyword arguments. If only positional arguments are given
-        the method expects the inputs to be in the canonical
-        nebullvm interface. If only keyword arguments are given the method
-        expects them to be in the HuggingFace interface. Mixed representation
-        is not allowed and will result in an error.
-        """
-        if len(args) > 0 and len(kwargs) > 0:
-            raise RuntimeError(
-                "Not allowed usage of the predict method. "
-                "Either the positional or the keyword arguments must be given."
-            )
-        if len(args) > 0:
-            return self.core_inference_learner(*args)
-        inputs = (kwargs.pop(name) for name in self.input_names)
-        outputs = self.core_inference_learner(*inputs)
-        return _restructure_output(
-            outputs, self.output_structure, self.output_type
-        )
-
-    def _get_extra_metadata_kwargs(self) -> Dict:
-        metadata_kwargs = {
-            "output_structure": self.output_structure,
-            "output_structure_keys": list(self.output_structure.keys()),
-            "input_names": self.input_names,
-        }
-        if self.output_type is not None:
-            metadata_kwargs.update(
-                {
-                    "output_type": self.output_type.__name__,
-                    "output_type_module": self.output_type.__module__,
-                }
-            )
-        return metadata_kwargs
-
-    @staticmethod
-    def _convert_metadata_to_inputs(metadata: LearnerMetadata) -> Dict:
-        # we need to guarantee the preservation of the output structure
-        # elements order.
-        output_structure = OrderedDict()
-        for key in metadata["output_structure_keys"]:
-            output_structure[key] = metadata["output_structure"][key]
-
-        inputs = {
-            "output_structure": output_structure,
-            "input_names": metadata["input_names"],
-        }
-        if metadata["output_type"] is not None:
-            exec(
-                f"from {metadata['output_type_module']} "
-                f"import {metadata['output_type']}"
-            )
-            inputs["output_type"] = eval(metadata["output_type"])
-        return inputs
-
-
 def _get_dynamic_axis(
     text: str,
     tokenizer: PreTrainedTokenizer,
@@ -302,45 +101,6 @@ def _get_extra_optimizer(
     return [HuggingFaceOptimizer(hugging_face_params={})]
 
 
-class _HFDataset(Sequence):
-    def __init__(
-        self,
-        input_texts: List,
-        ys: Optional[List],
-        keywords: List[str],
-        batch_size: int,
-        tokenizer: PreTrainedTokenizer,
-        tokenizer_args: Dict,
-    ):
-        self._input_texts = input_texts
-        self._ys = ys
-        self._bs = batch_size
-        self._keys = keywords
-        self._tokenizer = tokenizer
-        if self._tokenizer.pad_token is None:
-            self._tokenizer.pad_token = self._tokenizer.eos_token
-        _tokenizer_args = {"truncation": True, "padding": True}
-        _tokenizer_args.update(tokenizer_args)
-        self._tokenizer_args = _tokenizer_args
-
-    def __getitem__(self, item: int):
-        pointer = self._bs * item
-        if pointer >= len(self):
-            raise IndexError
-        mini_batch = self._input_texts[
-            pointer : pointer + self._bs  # noqa E203
-        ]
-        if self._ys is not None:
-            mini_batch_y = self._ys[pointer : pointer + self._bs]  # noqa E203
-        else:
-            mini_batch_y = None
-        encoded_inputs = self._tokenizer(mini_batch, **self._tokenizer_args)
-        return tuple(encoded_inputs[key] for key in self._keys), mini_batch_y
-
-    def __len__(self):
-        return len(self._input_texts)
-
-
 def optimize_huggingface_model(
     model: PreTrainedModel,
     tokenizer: PreTrainedTokenizer,
@@ -371,7 +131,7 @@ def optimize_huggingface_model(
         tokenizer (PreTrainedTokenizer): Tokenizer used for building model's
             inputs.
         input_texts (List[str]): Texts either from the training set or similar
-            to the ones contained in the text. If the perf_loss_ths is
+            to the ones contained in the text. If the metric_drop_ths is
             passed the input_text will be used for computing the drop in
             precision and for setting the quantization parameters. If you
             selected a quantization metric needing the input labels you need to
@@ -409,24 +169,29 @@ def optimize_huggingface_model(
             performed, since no data is given as input.
         perf_metric (Union[Callable, str], optional): The metric to
             be used for accepting or refusing a precision-reduction
-            optimization proposal. If none is given but a `perf_loss_ths` is
+            optimization proposal. If none is given but a `metric_drop_ths` is
             received, the `nebullvm.measure.compute_relative_difference`
             metric will be used as default one. A user-defined metric can
             be passed as function accepting as inputs two tuples of tensors
             (produced by the baseline and the quantized model) and the related
             original labels.
             For more information see
             `nebullvm.measure.compute_relative_difference` and
-            `nebullvm.measure.compute_accuracy_drop`. `perf_metric`
+            `nebullvm.measure.compute_accuracy_drop`. `metric`
             accepts as value also a string containing the metric name. At the
-            current stage the supported metrics are `"precision"` and
+            current stage the supported metrics are `"numeric_precision"` and
             `"accuracy"`.
         ys: List of target labels. For each input in `input_texts` there should
             be the corresponding label. Note that this feature is just used for
             estimating the accuracy drop while running precision-reduction
             techniques. It will be ignored if these techniques are not
             activated.
     """
+    warnings.warn(
+        "Deprecated: The usage of the HuggingFace api is deprecated. "
+        "`optimize_huggingface_model`will be removed from the next release. "
+        "Use `optimize_model` instead."
+    )
     if perf_loss_ths is not None and ys is None and perf_metric == "accuracy":
         raise ValueError(
             "You cannot select the accuracy as quantization metric without "
@@ -436,7 +201,7 @@ def optimize_huggingface_model(
         perf_metric = QUANTIZATION_METRIC_MAP.get(perf_metric)
     tokenizer_args = tokenizer_args or {}
     tokenizer_args.update({"return_tensors": "pt"})
-    output_structure, output_type = _get_output_structure(
+    output_structure, output_type = _get_output_structure_from_text(
         text=input_texts[0],
         model=model,
         tokenizer=tokenizer,
@@ -470,7 +235,7 @@ def optimize_huggingface_model(
             else None,
             perf_loss_ths=perf_loss_ths,
             perf_metric=perf_metric,
-            dataloader=_HFDataset(
+            dataloader=_HFTextDataset(
                 input_texts,
                 ys,
                 list(wrapper_model.inputs_types.keys()),