Fixes for YOLOv8 (#144)

* fixed links on speedster readme & added save/load example to all notebooks * fix bugs in torchscript and tensorrt compilers * added notebook for yolov8 * change input size in notebook
nebuly-ai · Jan 10, 2023 · 7dc5b8a · 7dc5b8a
1 parent e50d9ca
commit 7dc5b8a
Show file tree

Hide file tree

Showing 7 changed files with 383 additions and 9 deletions.
diff --git a/nebullvm/config.py b/nebullvm/config.py
@@ -1,7 +1,7 @@
 from nebullvm.optional_modules.torch import torch
 
 
-VERSION = "0.7.0"
+VERSION = "0.7.1"
 LEARNER_METADATA_FILENAME = "metadata.json"
 ONNX_OPSET_VERSION = 13
 NEBULLVM_DEBUG_FILE = "nebullvm_debug.json"

diff --git a/nebullvm/operations/optimizations/compilers/pytorch.py b/nebullvm/operations/optimizations/compilers/pytorch.py
@@ -76,16 +76,22 @@ def execute(
                 model, quantization_type, input_tfms, train_input_data
             )
 
-        self.compiled_model = self._compile_model(model, input_data)
+        self.compiled_model = self._compile_model(
+            model, input_data, quantization_type
+        )
 
     def _compile_model(
         self,
         model: Union[Module, GraphModule],
         input_data: DataManager,
+        quantization_type: QuantizationType,
     ) -> ScriptModule:
         input_sample = input_data.get_list(1)[0]
         if self.device is Device.GPU:
-            input_sample = [t.cuda() for t in input_sample]
+            if quantization_type is QuantizationType.HALF:
+                input_sample = [t.cuda().half() for t in input_sample]
+            else:
+                input_sample = [t.cuda() for t in input_sample]
 
         if not isinstance(model, torch.fx.GraphModule):
             model.eval()

diff --git a/nebullvm/operations/optimizations/compilers/tensor_rt.py b/nebullvm/operations/optimizations/compilers/tensor_rt.py
@@ -1,4 +1,5 @@
 import abc
+import copy
 import os
 import subprocess
 from pathlib import Path
@@ -153,7 +154,9 @@ def _compile_model(
 
         with torch_tensorrt.logging.errors():
             trt_model = torch_tensorrt.compile(
-                model,
+                model
+                if dtype is not torch.half
+                else copy.deepcopy(model).half(),
                 inputs=[
                     torch_tensorrt.Input(
                         tensor.shape,

diff --git a/...elerate_PyTorch_YOLO_with_Speedster.ipynb → ...erate_PyTorch_YOLOv5_with_Speedster.ipynb b/...elerate_PyTorch_YOLO_with_Speedster.ipynb → ...erate_PyTorch_YOLOv5_with_Speedster.ipynb