Add Dorckerfile and implement TFLite and Torchscript as model-backends (

#64) * add tf and torch backeneds * add torch backend * add tqdm to requirements * avoid installation of compilers when NO_COMPILER_INSTALLATION is set * fix error with onnx installation * fix error with onnx installation * fix error with onnx installation * fix error with onnx installation * Added Dockerfile & bugfix (#63) * add dockerfile * fix tvm configs issue in the tvm installer * fix tvm issue * fix tvm * fix dockerfile & created build script for the docker images * removed redundant spaces Co-authored-by: Valerio Sofi <[email protected]> * add tflite to tf api * fix logging * fix error with half precision in torch * fix minor bugs * fix bugs * fix import * fix bug with tf * fix error with DeviceArrays in polygraphy * fix another bug * upgrade version Co-authored-by: morgoth95 <[email protected]> Co-authored-by: Valerio Sofi <[email protected]>
nebuly-ai · Jun 28, 2022 · 06fc486 · 06fc486
1 parent 362cfb1
commit 06fc486
Show file tree

Hide file tree

Showing 32 changed files with 1,019 additions and 159 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -128,3 +128,6 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# MacOS DS_Store
+.DS_Store
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,46 @@
+ARG STARTING_IMAGE=nvidia/cuda:11.2.0-runtime-ubuntu20.04
+FROM ${STARTING_IMAGE}
+
+# Set frontend as non-interactive
+ARG DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update
+
+# Install python and pip
+RUN apt-get install -y python3-opencv python3-pip && \
+    python3 -m pip install --upgrade pip && \
+    apt-get -y install git
+
+# Install nebullvm
+ARG NEBULLVM_VERSION=latest
+RUN if [ "$NEBULLVM_VERSION" = "latest" ] ; then \
+        # pip install nebullvm ; \
+        git clone https://github.com/nebuly-ai/nebullvm.git ; \
+        cd nebullvm ; \
+        pip install . ;\
+    else \
+        pip install nebullvm==${NEBULLVM_VERSION} ; \
+    fi
+
+# Install required python modules
+RUN pip install scipy==1.5.4 && \
+    pip install cmake
+
+# Install default deep learning compilers
+ARG COMPILER=all
+ENV NO_COMPILER_INSTALLATION=1
+RUN if [ "$COMPILER" = "all" ] ; then \
+        python3 -c "import os; os.environ['NO_COMPILER_INSTALLATION'] = '0'; import nebullvm" ; \
+    elif [ "$COMPILER" = "tensorrt" ] ; then \
+        python3 -c "from nebullvm.installers.installers import install_tensor_rt; install_tensor_rt()" ; \
+    elif [ "$COMPILER" = "openvino" ] ; then \
+        python3 -c "from nebullvm.installers.installers import install_openvino; install_openvino()" ; \
+    elif [ "$COMPILER" = "onnxruntime" ] ; then \
+        python3 -c "from nebullvm.installers.installers import install_onnxruntime; install_onnxruntime()" ; \
+    fi
+
+# Install TVM
+RUN if [ "$COMPILER" = "all" ] || [ "$COMPILER" = "tvm" ] ; then \
+        python3 -c "from nebullvm.installers.installers import install_tvm; install_tvm()" ; \
+        python3 -c "from tvm.runtime import Module" ; \
+    fi
diff --git a/docker_build.sh b/docker_build.sh
@@ -0,0 +1,8 @@
+# Create image with all compilers installed
+docker build -t nebullvm-all .
+
+# Create an image for each compiler installed
+docker build -t nebullvm-onnxruntime . --build-arg COMPILER="onnxruntime"
+docker build -t nebullvm-openvino . --build-arg COMPILER="openvino"
+docker build -t nebullvm-tvm . --build-arg COMPILER="tvm"
+docker build -t nebullvm-tensorrt . --build-arg COMPILER="tensorrt"
diff --git a/nebullvm/api/frontend/onnx.py b/nebullvm/api/frontend/onnx.py
@@ -225,7 +225,7 @@ def optimize_onnx_model(
         )
         if model_optimizer.usable:
             model_optimized = model_optimizer.optimize(
-                onnx_model=str(onnx_path),
+                model=str(onnx_path),
                 output_library=dl_library,
                 model_params=model_params,
                 input_tfms=input_tfms,

diff --git a/nebullvm/api/frontend/tf.py b/nebullvm/api/frontend/tf.py
@@ -1,9 +1,13 @@
+import logging
 import os
+import warnings
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import List, Tuple, Union, Dict, Optional, Callable, Any
 
+import numpy as np
 import tensorflow as tf
+from tqdm import tqdm
 
 from nebullvm.api.frontend.utils import (
     ifnone,
@@ -15,9 +19,13 @@
     ModelParams,
     InputInfo,
     ModelCompiler,
+    QuantizationType,
 )
 from nebullvm.converters import ONNXConverter
+from nebullvm.inference_learners import TensorflowBaseInferenceLearner
+from nebullvm.measure import compute_optimized_running_time
 from nebullvm.optimizers import BaseOptimizer
+from nebullvm.optimizers.tensorflow import TensorflowBackendOptimizer
 from nebullvm.transformations.base import MultiStageTransformation
 from nebullvm.utils.data import DataManager
 from nebullvm.utils.tf import (
@@ -27,6 +35,12 @@
 )
 from nebullvm.optimizers.multi_compiler import MultiCompilerOptimizer
 
+logging.basicConfig(
+    format="%(asctime)s %(message)s", datefmt="%d/%m/%Y %I:%M:%S %p"
+)
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
 
 def _extract_dynamic_axis(
     tf_model: tf.Module,
@@ -216,19 +230,123 @@ def optimize_tf_model(
         ignore_compilers=ignore_compilers,
         extra_optimizers=custom_optimizers,
         debug_mode=int(os.environ.get("DEBUG_MODE", "0")) > 0,
+        logger=logger,
     )
     with TemporaryDirectory() as tmp_dir:
+        logger.info("Running Optimization using tensorflow interface (1/3)")
+        if perf_loss_ths is not None:
+            q_types = [
+                None,
+                QuantizationType.DYNAMIC,
+                QuantizationType.HALF,
+            ]
+            if dataset is not None:
+                q_types.append(QuantizationType.STATIC)
+        else:
+            q_types = [None]
+        torch_res = [
+            _torch_api_optimization(
+                model, model_params, perf_loss_ths, q_type, False, input_data
+            )
+            for q_type in tqdm(q_types)
+        ]
+        (tf_api_model, tf_api_latency, used_compilers,) = sorted(
+            torch_res, key=lambda x: x[1]
+        )[0]
+        ignore_compilers.extend(used_compilers)
+        logger.info("Running Optimization using ONNX interface (2/3)")
         onnx_path = model_converter.convert(
             model, model_params.input_sizes, Path(tmp_dir)
         )
         model_optimized = model_optimizer.optimize(
-            onnx_model=str(onnx_path),
+            model=str(onnx_path),
             output_library=dl_library,
             model_params=model_params,
             input_tfms=input_tfms,
             perf_loss_ths=perf_loss_ths,
             perf_metric=perf_metric,
             input_data=input_data,
         )
+        logger.info("Running comparison between optimized models (3/3).")
+        model_optimized = _compare_optimized_models(
+            model_optimized, tf_api_model, tf_api_latency
+        )
+        if model_optimized is None:
+            raise RuntimeError(
+                "No valid compiled model has been produced. "
+                "Look at the logs for further information about the failure."
+            )
         model_optimized.save(save_dir)
     return model_optimized.load(save_dir)
+
+
+def _compare_optimized_models(
+    new_model: TensorflowBaseInferenceLearner,
+    previous_best_model: TensorflowBaseInferenceLearner,
+    previous_latency: float,
+) -> TensorflowBaseInferenceLearner:
+    if new_model is not None:
+        new_latency = compute_optimized_running_time(new_model)
+        if new_latency < previous_latency:
+            return new_model
+    return previous_best_model
+
+
+def _get_optimizers_supporting_tf_api(use_extra_compilers: bool):
+    if use_extra_compilers:
+        logger.warning(
+            "No compiler found supporting the tensorflow interface."
+        )
+    return [(ModelCompiler.TFLITE, TensorflowBackendOptimizer(logger=logger))]
+
+
+def _torch_api_optimization(
+    model: tf.Module,
+    model_params: ModelParams,
+    quantization_ths: float,
+    quantization_type: QuantizationType,
+    use_extra_compilers: bool,
+    input_data: DataManager,
+) -> Tuple[Optional[TensorflowBaseInferenceLearner], float, List]:
+    used_compilers = []
+    best_tf_opt_model = None
+    best_latency = np.inf
+    for compiler, optimizer in tqdm(
+        _get_optimizers_supporting_tf_api(use_extra_compilers)
+    ):
+        try:
+            if hasattr(optimizer, "optimize_from_tf"):
+                candidate_model = optimizer.optimize_from_tf(
+                    torch_model=model,
+                    model_params=model_params,
+                    perf_loss_ths=quantization_ths
+                    if quantization_type is not None
+                    else None,
+                    quantization_type=quantization_type,
+                    input_data=input_data,
+                )
+            else:
+                candidate_model = optimizer.optimize(
+                    model=model,
+                    output_library=DeepLearningFramework.PYTORCH,
+                    model_params=model_params,
+                    perf_loss_ths=quantization_ths
+                    if quantization_type is not None
+                    else None,
+                    quantization_type=quantization_type,
+                    input_data=input_data,
+                )
+            candidate_latency = compute_optimized_running_time(candidate_model)
+            if candidate_latency < best_latency:
+                best_latency = candidate_latency
+                best_tf_opt_model = candidate_model
+            used_compilers.append(compiler)
+        except Exception as ex:
+            warnings.warn(
+                f"Compilation failed with torch interface of {compiler}. "
+                f"Got error {ex}. If possible the compilation will be "
+                f"re-scheduled with the ONNX interface. Please consult the "
+                f"documentation for further info or open an issue on GitHub "
+                f"for receiving assistance."
+            )
+    return best_tf_opt_model, best_latency, used_compilers