From 8c3ba59aaf189b5a7e6ce88763f2d9acdf90a3cc Mon Sep 17 00:00:00 2001
From: gulou <113007768+liaogulou@users.noreply.github.com>
Date: Tue, 31 Oct 2023 14:56:08 +0800
Subject: [PATCH] Features/self test onnx (#330)

add yolox onnx export method
---
 configs/config_templates/yolox_itag.py | 78 +++++++++++++-------------
 easycv/apis/export.py                  | 31 +++++++++-
 easycv/predictors/detector.py          | 37 ++++++++++--
 requirements/runtime.txt               |  1 +
 tests/test_tools/test_predict.py       |  6 +-
 5 files changed, 101 insertions(+), 52 deletions(-)

diff --git a/configs/config_templates/yolox_itag.py b/configs/config_templates/yolox_itag.py
index fff720b2..b190edfb 100644
--- a/configs/config_templates/yolox_itag.py
+++ b/configs/config_templates/yolox_itag.py
@@ -49,14 +49,14 @@
     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 
 train_pipeline = [
-    dict(type='MMMosaic', img_scale='${img_scale}', pad_val=114.0),
+    dict(type='MMMosaic', img_scale=tuple(img_scale), pad_val=114.0),
     dict(
         type='MMRandomAffine',
-        scaling_ratio_range='${scale_ratio}',
-        border=['-${img_scale}[0] // 2', '-${img_scale}[1] // 2']),
+        scaling_ratio_range=scale_ratio,
+        border=[img_scale[0] // 2, img_scale[1] // 2]),
     dict(
         type='MMMixUp',  # s m x l; tiny nano will detele
-        img_scale='${img_scale}',
+        img_scale=tuple(img_scale),
         ratio_range=(0.8, 1.6),
         pad_val=114.0),
     dict(
@@ -70,45 +70,43 @@
     dict(type='MMPad', pad_to_square=True, pad_val=(114.0, 114.0, 114.0)),
     dict(
         type='MMNormalize',
-        mean='${img_norm_cfg.mean}',
-        std='${img_norm_cfg.std}',
-        to_rgb='${img_norm_cfg.to_rgb}'),
+        mean=img_norm_cfg['mean'],
+        std=img_norm_cfg['std'],
+        to_rgb=img_norm_cfg['to_rgb']),
     dict(type='DefaultFormatBundle'),
     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
 ]
 test_pipeline = [
-    dict(type='MMResize', img_scale='${img_scale}', keep_ratio=True),
+    dict(type='MMResize', img_scale=img_scale, keep_ratio=True),
     dict(type='MMPad', pad_to_square=True, pad_val=(114.0, 114.0, 114.0)),
     dict(
         type='MMNormalize',
-        mean='${img_norm_cfg.mean}',
-        std='${img_norm_cfg.std}',
-        to_rgb='${img_norm_cfg.to_rgb}'),
+        mean=img_norm_cfg['mean'],
+        std=img_norm_cfg['std'],
+        to_rgb=img_norm_cfg['to_rgb']),
     dict(type='DefaultFormatBundle'),
     dict(type='Collect', keys=['img'])
 ]
 
+train_path = 'data/coco/train2017.manifest'
+val_path = 'data/coco/val2017.manifest'
+
+train_dataset = dict(
+    type='DetImagesMixDataset',
+    data_source=dict(type='DetSourcePAI', path=train_path, classes=CLASSES),
+    pipeline=train_pipeline,
+    dynamic_scale=tuple(img_scale))
+
+val_dataset = dict(
+    type='DetImagesMixDataset',
+    imgs_per_gpu=2,
+    data_source=dict(type='DetSourcePAI', path=val_path, classes=CLASSES),
+    pipeline=test_pipeline,
+    dynamic_scale=None,
+    label_padding=False)
+
 data = dict(
-    imgs_per_gpu=16,
-    workers_per_gpu=4,
-    train=dict(
-        type='DetImagesMixDataset',
-        data_source=dict(
-            type='DetSourcePAI',
-            path='data/coco/train2017.manifest',
-            classes='${CLASSES}'),
-        pipeline='${train_pipeline}',
-        dynamic_scale='${img_scale}'),
-    val=dict(
-        type='DetImagesMixDataset',
-        imgs_per_gpu=2,
-        data_source=dict(
-            type='DetSourcePAI',
-            path='data/coco/val2017.manifest',
-            classes='${CLASSES}'),
-        pipeline='${test_pipeline}',
-        dynamic_scale=None,
-        label_padding=False))
+    imgs_per_gpu=16, workers_per_gpu=4, train=train_dataset, val=val_dataset)
 
 # additional hooks
 interval = 10
@@ -120,14 +118,14 @@
         priority=48),
     dict(
         type='SyncRandomSizeHook',
-        ratio_range='${random_size}',
-        img_scale='${img_scale}',
-        interval='${interval}',
+        ratio_range=random_size,
+        img_scale=img_scale,
+        interval=interval,
         priority=48),
     dict(
         type='SyncNormHook',
         num_last_epochs=15,
-        interval='${interval}',
+        interval=interval,
         priority=48)
 ]
 
@@ -135,23 +133,23 @@
 vis_num = 20
 score_thr = 0.5
 eval_config = dict(
-    interval='${interval}',
+    interval=interval,
     gpu_collect=False,
     visualization_config=dict(
-        vis_num='${vis_num}',
-        score_thr='${score_thr}',
+        vis_num=vis_num,
+        score_thr=score_thr,
     )  # show by TensorboardLoggerHookV2
 )
 
 eval_pipelines = [
     dict(
         mode='test',
-        data='${data.val}',
+        data=val_dataset,
         evaluators=[dict(type='CocoDetectionEvaluator', classes=CLASSES)],
     )
 ]
 
-checkpoint_config = dict(interval='${interval}')
+checkpoint_config = dict(interval=interval)
 # optimizer
 # basic_lr_per_img = 0.01 / 64.0
 optimizer = dict(
diff --git a/easycv/apis/export.py b/easycv/apis/export.py
index 3f8ffb07..0cdc4da7 100644
--- a/easycv/apis/export.py
+++ b/easycv/apis/export.py
@@ -247,10 +247,10 @@ def _export_yolox(model, cfg, filename):
 
     if hasattr(cfg, 'export'):
         export_type = getattr(cfg.export, 'export_type', 'raw')
-        default_export_type_list = ['raw', 'jit', 'blade']
+        default_export_type_list = ['raw', 'jit', 'blade', 'onnx']
         if export_type not in default_export_type_list:
             logging.warning(
-                'YOLOX-PAI only supports the export type as  [raw,jit,blade], otherwise we use raw as default'
+                'YOLOX-PAI only supports the export type as  [raw,jit,blade,onnx], otherwise we use raw as default'
             )
             export_type = 'raw'
 
@@ -276,7 +276,7 @@ def _export_yolox(model, cfg, filename):
                 len(img_scale) == 2
             ), 'Export YoloX predictor config contains img_scale must be (int, int) tuple!'
 
-            input = 255 * torch.rand((batch_size, 3) + img_scale)
+            input = 255 * torch.rand((batch_size, 3) + tuple(img_scale))
 
             # assert use_trt_efficientnms only happens when static_opt=True
             if static_opt is not True:
@@ -355,6 +355,31 @@ def _export_yolox(model, cfg, filename):
 
                     json.dump(config, ofile)
 
+            if export_type == 'onnx':
+
+                with io.open(
+                        filename + '.config.json' if filename.endswith('onnx')
+                        else filename + '.onnx.config.json', 'w') as ofile:
+                    config = dict(
+                        model=cfg.model,
+                        export=cfg.export,
+                        test_pipeline=cfg.test_pipeline,
+                        classes=cfg.CLASSES)
+
+                    json.dump(config, ofile)
+
+                torch.onnx.export(
+                    model,
+                    input.to(device),
+                    filename if filename.endswith('onnx') else filename +
+                    '.onnx',
+                    export_params=True,
+                    opset_version=12,
+                    do_constant_folding=True,
+                    input_names=['input'],
+                    output_names=['output'],
+                )
+
             if export_type == 'jit':
                 with io.open(filename + '.jit', 'wb') as ofile:
                     torch.jit.save(yolox_trace, ofile)
diff --git a/easycv/predictors/detector.py b/easycv/predictors/detector.py
index 35d62e22..ed7dd908 100644
--- a/easycv/predictors/detector.py
+++ b/easycv/predictors/detector.py
@@ -23,6 +23,12 @@
     from .interface import PredictorInterface
 
 
+# 将张量转化为ndarray格式
+def onnx_to_numpy(tensor):
+    return tensor.detach().cpu().numpy(
+    ) if tensor.requires_grad else tensor.cpu().numpy()
+
+
 class DetInputProcessor(InputProcessor):
 
     def build_processor(self):
@@ -349,9 +355,11 @@ def __init__(self,
                 self.model_type = 'jit'
             elif model_path.endswith('blade'):
                 self.model_type = 'blade'
+            elif model_path.endswith('onnx'):
+                self.model_type = 'onnx'
             else:
                 self.model_type = 'raw'
-        assert self.model_type in ['raw', 'jit', 'blade']
+        assert self.model_type in ['raw', 'jit', 'blade', 'onnx']
 
         if self.model_type == 'blade' or self.use_trt_efficientnms:
             import torch_blade
@@ -381,8 +389,16 @@ def __init__(self,
 
     def _build_model(self):
         if self.model_type != 'raw':
-            with io.open(self.model_path, 'rb') as infile:
-                model = torch.jit.load(infile, self.device)
+            if self.model_type != 'onnx':
+                with io.open(self.model_path, 'rb') as infile:
+                    model = torch.jit.load(infile, self.device)
+            else:
+                import onnxruntime
+                if onnxruntime.get_device() == 'GPU':
+                    model = onnxruntime.InferenceSession(
+                        self.model_path, providers=['CUDAExecutionProvider'])
+                else:
+                    model = onnxruntime.InferenceSession(self.model_path)
         else:
             from easycv.utils.misc import reparameterize_models
             model = super()._build_model()
@@ -394,8 +410,9 @@ def prepare_model(self):
         If the model is not loaded from a configuration file, e.g. torch jit model, you need to reimplement it.
         """
         model = self._build_model()
-        model.to(self.device)
-        model.eval()
+        if self.model_type != 'onnx':
+            model.to(self.device)
+            model.eval()
         if self.model_type == 'raw':
             load_checkpoint(model, self.model_path, map_location='cpu')
         return model
@@ -406,7 +423,15 @@ def model_forward(self, inputs):
         """
         if self.model_type != 'raw':
             with torch.no_grad():
-                outputs = self.model(inputs['img'])
+                if self.model_type != 'onnx':
+                    outputs = self.model(inputs['img'])
+                else:
+                    outputs = self.model.run(
+                        None, {
+                            self.model.get_inputs()[0].name:
+                            onnx_to_numpy(inputs['img'])
+                        })[0]
+                    outputs = torch.from_numpy(outputs)
                 outputs = {'results': outputs}  # convert to dict format
         else:
             outputs = super().model_forward(inputs)
diff --git a/requirements/runtime.txt b/requirements/runtime.txt
index edff212e..2aa63ac3 100644
--- a/requirements/runtime.txt
+++ b/requirements/runtime.txt
@@ -13,6 +13,7 @@ lmdb
 numba
 numpy
 nuscenes-devkit
+onnxruntime
 opencv-python
 oss2
 packaging
diff --git a/tests/test_tools/test_predict.py b/tests/test_tools/test_predict.py
index 74c163fd..69f9a3e9 100644
--- a/tests/test_tools/test_predict.py
+++ b/tests/test_tools/test_predict.py
@@ -83,12 +83,12 @@ def test_predict_oss_path(self):
         oss_config = get_oss_config()
         ak_id = oss_config['ak_id']
         ak_secret = oss_config['ak_secret']
-        hosts = oss_config['hosts'] + ['oss-cn-hangzhou.aliyuncs.com']
+        hosts = oss_config['hosts']
         hosts = ','.join(_ for _ in hosts)
-        buckets = oss_config['buckets'] + ['easycv']
+        buckets = oss_config['buckets']
         buckets = ','.join(_ for _ in buckets)
 
-        input_file = 'oss://easycv/data/small_test_data/test_images/http_image_list.txt'
+        input_file = 'oss://pai-vision-data-hz/unittest/local_backup/easycv_nfs/data/test_images/http_image_list.txt'
         output_file = tempfile.NamedTemporaryFile('w').name
         cmd = f'PYTHONPATH=. python tools/predict.py \
                     --input_file {input_file} \