Skip to content

Commit

Permalink
Add Dorckerfile and implement TFLite and Torchscript as model-backends (
Browse files Browse the repository at this point in the history
#64)

* add tf and torch backeneds

* add torch backend

* add tqdm to requirements

* avoid installation of compilers when NO_COMPILER_INSTALLATION is set

* fix error with onnx installation

* fix error with onnx installation

* fix error with onnx installation

* fix error with onnx installation

* Added Dockerfile & bugfix (#63)

* add dockerfile

* fix tvm configs issue in the tvm installer

* fix tvm issue

* fix tvm

* fix dockerfile & created build script for the docker images

* removed redundant spaces

Co-authored-by: Valerio Sofi <[email protected]>

* add tflite to tf api

* fix logging

* fix error with half precision in torch

* fix minor bugs

* fix bugs

* fix import

* fix bug with tf

* fix error with DeviceArrays in polygraphy

* fix another bug

* upgrade version

Co-authored-by: morgoth95 <[email protected]>
Co-authored-by: Valerio Sofi <[email protected]>
  • Loading branch information
3 people authored Jun 28, 2022
1 parent 362cfb1 commit 06fc486
Show file tree
Hide file tree
Showing 32 changed files with 1,019 additions and 159 deletions.
Binary file added .DS_Store
Binary file not shown.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,6 @@ dmypy.json

# Pyre type checker
.pyre/

# MacOS DS_Store
.DS_Store
46 changes: 46 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
ARG STARTING_IMAGE=nvidia/cuda:11.2.0-runtime-ubuntu20.04
FROM ${STARTING_IMAGE}

# Set frontend as non-interactive
ARG DEBIAN_FRONTEND=noninteractive

RUN apt-get update

# Install python and pip
RUN apt-get install -y python3-opencv python3-pip && \
python3 -m pip install --upgrade pip && \
apt-get -y install git

# Install nebullvm
ARG NEBULLVM_VERSION=latest
RUN if [ "$NEBULLVM_VERSION" = "latest" ] ; then \
# pip install nebullvm ; \
git clone https://github.com/nebuly-ai/nebullvm.git ; \
cd nebullvm ; \
pip install . ;\
else \
pip install nebullvm==${NEBULLVM_VERSION} ; \
fi

# Install required python modules
RUN pip install scipy==1.5.4 && \
pip install cmake

# Install default deep learning compilers
ARG COMPILER=all
ENV NO_COMPILER_INSTALLATION=1
RUN if [ "$COMPILER" = "all" ] ; then \
python3 -c "import os; os.environ['NO_COMPILER_INSTALLATION'] = '0'; import nebullvm" ; \
elif [ "$COMPILER" = "tensorrt" ] ; then \
python3 -c "from nebullvm.installers.installers import install_tensor_rt; install_tensor_rt()" ; \
elif [ "$COMPILER" = "openvino" ] ; then \
python3 -c "from nebullvm.installers.installers import install_openvino; install_openvino()" ; \
elif [ "$COMPILER" = "onnxruntime" ] ; then \
python3 -c "from nebullvm.installers.installers import install_onnxruntime; install_onnxruntime()" ; \
fi

# Install TVM
RUN if [ "$COMPILER" = "all" ] || [ "$COMPILER" = "tvm" ] ; then \
python3 -c "from nebullvm.installers.installers import install_tvm; install_tvm()" ; \
python3 -c "from tvm.runtime import Module" ; \
fi
8 changes: 8 additions & 0 deletions docker_build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Create image with all compilers installed
docker build -t nebullvm-all .

# Create an image for each compiler installed
docker build -t nebullvm-onnxruntime . --build-arg COMPILER="onnxruntime"
docker build -t nebullvm-openvino . --build-arg COMPILER="openvino"
docker build -t nebullvm-tvm . --build-arg COMPILER="tvm"
docker build -t nebullvm-tensorrt . --build-arg COMPILER="tensorrt"
2 changes: 1 addition & 1 deletion nebullvm/api/frontend/onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def optimize_onnx_model(
)
if model_optimizer.usable:
model_optimized = model_optimizer.optimize(
onnx_model=str(onnx_path),
model=str(onnx_path),
output_library=dl_library,
model_params=model_params,
input_tfms=input_tfms,
Expand Down
120 changes: 119 additions & 1 deletion nebullvm/api/frontend/tf.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import logging
import os
import warnings
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import List, Tuple, Union, Dict, Optional, Callable, Any

import numpy as np
import tensorflow as tf
from tqdm import tqdm

from nebullvm.api.frontend.utils import (
ifnone,
Expand All @@ -15,9 +19,13 @@
ModelParams,
InputInfo,
ModelCompiler,
QuantizationType,
)
from nebullvm.converters import ONNXConverter
from nebullvm.inference_learners import TensorflowBaseInferenceLearner
from nebullvm.measure import compute_optimized_running_time
from nebullvm.optimizers import BaseOptimizer
from nebullvm.optimizers.tensorflow import TensorflowBackendOptimizer
from nebullvm.transformations.base import MultiStageTransformation
from nebullvm.utils.data import DataManager
from nebullvm.utils.tf import (
Expand All @@ -27,6 +35,12 @@
)
from nebullvm.optimizers.multi_compiler import MultiCompilerOptimizer

logging.basicConfig(
format="%(asctime)s %(message)s", datefmt="%d/%m/%Y %I:%M:%S %p"
)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


def _extract_dynamic_axis(
tf_model: tf.Module,
Expand Down Expand Up @@ -216,19 +230,123 @@ def optimize_tf_model(
ignore_compilers=ignore_compilers,
extra_optimizers=custom_optimizers,
debug_mode=int(os.environ.get("DEBUG_MODE", "0")) > 0,
logger=logger,
)
with TemporaryDirectory() as tmp_dir:
logger.info("Running Optimization using tensorflow interface (1/3)")
if perf_loss_ths is not None:
q_types = [
None,
QuantizationType.DYNAMIC,
QuantizationType.HALF,
]
if dataset is not None:
q_types.append(QuantizationType.STATIC)
else:
q_types = [None]
torch_res = [
_torch_api_optimization(
model, model_params, perf_loss_ths, q_type, False, input_data
)
for q_type in tqdm(q_types)
]
(tf_api_model, tf_api_latency, used_compilers,) = sorted(
torch_res, key=lambda x: x[1]
)[0]
ignore_compilers.extend(used_compilers)
logger.info("Running Optimization using ONNX interface (2/3)")
onnx_path = model_converter.convert(
model, model_params.input_sizes, Path(tmp_dir)
)
model_optimized = model_optimizer.optimize(
onnx_model=str(onnx_path),
model=str(onnx_path),
output_library=dl_library,
model_params=model_params,
input_tfms=input_tfms,
perf_loss_ths=perf_loss_ths,
perf_metric=perf_metric,
input_data=input_data,
)
logger.info("Running comparison between optimized models (3/3).")
model_optimized = _compare_optimized_models(
model_optimized, tf_api_model, tf_api_latency
)
if model_optimized is None:
raise RuntimeError(
"No valid compiled model has been produced. "
"Look at the logs for further information about the failure."
)
model_optimized.save(save_dir)
return model_optimized.load(save_dir)


def _compare_optimized_models(
new_model: TensorflowBaseInferenceLearner,
previous_best_model: TensorflowBaseInferenceLearner,
previous_latency: float,
) -> TensorflowBaseInferenceLearner:
if new_model is not None:
new_latency = compute_optimized_running_time(new_model)
if new_latency < previous_latency:
return new_model
return previous_best_model


def _get_optimizers_supporting_tf_api(use_extra_compilers: bool):
if use_extra_compilers:
logger.warning(
"No compiler found supporting the tensorflow interface."
)
return [(ModelCompiler.TFLITE, TensorflowBackendOptimizer(logger=logger))]


def _torch_api_optimization(
model: tf.Module,
model_params: ModelParams,
quantization_ths: float,
quantization_type: QuantizationType,
use_extra_compilers: bool,
input_data: DataManager,
) -> Tuple[Optional[TensorflowBaseInferenceLearner], float, List]:
used_compilers = []
best_tf_opt_model = None
best_latency = np.inf
for compiler, optimizer in tqdm(
_get_optimizers_supporting_tf_api(use_extra_compilers)
):
try:
if hasattr(optimizer, "optimize_from_tf"):
candidate_model = optimizer.optimize_from_tf(
torch_model=model,
model_params=model_params,
perf_loss_ths=quantization_ths
if quantization_type is not None
else None,
quantization_type=quantization_type,
input_data=input_data,
)
else:
candidate_model = optimizer.optimize(
model=model,
output_library=DeepLearningFramework.PYTORCH,
model_params=model_params,
perf_loss_ths=quantization_ths
if quantization_type is not None
else None,
quantization_type=quantization_type,
input_data=input_data,
)
candidate_latency = compute_optimized_running_time(candidate_model)
if candidate_latency < best_latency:
best_latency = candidate_latency
best_tf_opt_model = candidate_model
used_compilers.append(compiler)
except Exception as ex:
warnings.warn(
f"Compilation failed with torch interface of {compiler}. "
f"Got error {ex}. If possible the compilation will be "
f"re-scheduled with the ONNX interface. Please consult the "
f"documentation for further info or open an issue on GitHub "
f"for receiving assistance."
)
return best_tf_opt_model, best_latency, used_compilers
Loading

0 comments on commit 06fc486

Please sign in to comment.