DeepLink-org · fandaoyi · Aug 17, 2023 · Jul 21, 2023 · Jul 25, 2023 · Jul 26, 2023
@@ -33,6 +33,7 @@ jobs:
     steps:
       - name: clone repo
         run: |
+          set -e
           cd ${GITHUB_WORKSPACE} && rm -rf DIPU DIPU_DIOPI && git clone https://github.com/DeepLink-org/DIPU.git && cd DIPU
           if [ $GITHUB_EVENT_NAME == "pull_request" ]; then
             echo "${{ github.base_ref }} "

@@ -1716,3 +1716,46 @@
   custom_code_at_the_beginning: |
     ::diopiSize_t dimDiopiSize = toDiopiSize(dim);
   interface: diopiAmax(ctx, out, self, dimDiopiSize, keepdim)
+
+- schema: batch_norm_stats(Tensor input, float eps) -> (Tensor, Tensor)
+  custom_code_at_the_beginning: |
+    auto shape = input.size(1);
+    auto out0 = at::empty({shape}, input.options().dtype(at::kFloat));
+    auto out1 = at::empty({shape}, input.options().dtype(at::kFloat));
+  interface: diopiBatchNormStats(ctx, out0, out1, input, eps)
+
+- schema: batch_norm_gather_stats_with_counts(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, Tensor counts) -> (Tensor, Tensor)
+  custom_code_at_the_beginning: |
+    auto shape = input.size(1);
+    auto out0 = at::empty({shape}, input.options().dtype(at::kFloat));
+    auto out1 = at::empty({shape}, input.options().dtype(at::kFloat));
+  interface: diopiBatchNormGatherStatsWithCounts(ctx, out0, out1, input, mean, invstd, const_cast<diopiTensorHandle_t>(running_mean), const_cast<diopiTensorHandle_t>(running_var), momentum, eps, counts)
+
+- schema: batch_norm_backward_reduce(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, bool input_g, bool weight_g, bool bias_g) -> (Tensor, Tensor, Tensor, Tensor)
+  custom_code_at_the_beginning: |
+    auto shape = input.size(1);
+    at::Tensor out0;
+    at::Tensor out1;
+    at::Tensor out2;
+    at::Tensor out3;
+    if(input_g){
+      out0 = at::empty({shape}, input.options().dtype(at::kFloat));
+      out1 = at::empty({shape}, input.options().dtype(at::kFloat));
+    }
+    if(weight_g){
+      out2 = at::empty({shape}, input.options().dtype(at::kFloat));
+    }
+    if(bias_g){
+      out3 = at::empty({shape}, input.options().dtype(at::kFloat));
+    }
+  interface: diopiBatchNormBackwardReduce(ctx, out0, out1, out2, out3, grad_out, input, mean, invstd, weight, input_g, weight_g, bias_g)
+
+- schema: batch_norm_backward_elemt(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor sum_dy, Tensor sum_dy_xmu, Tensor count) -> Tensor
+  custom_code_at_the_beginning: |
+    auto out = at::empty_like(grad_out);
+  interface: diopiBatchNormBackwardElemt(ctx, out, grad_out, input, mean, invstd, weight, sum_dy, sum_dy_xmu, count);
+
+- schema: batch_norm_elemt(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor invstd, float eps) -> Tensor
+  custom_code_at_the_beginning: |
+    auto out = at::empty_like(input);
+  interface: diopiBatchNormElemt(ctx, out, input, weight, bias, mean, invstd, eps);
@@ -1,6 +1,7 @@
 #!/bin/bash
 
 function clone_needed_repo() {
+    set -e
     # clone some repositories
 
     #define some version
@@ -16,9 +17,10 @@ function clone_needed_repo() {
     MMAGIC=dipu_v1.0.0_one_iter_tool
     SMART_VERSION=dev_for_mmcv2.0
     MMYOLO=dipu_v0.5.0_one_iter_tool
+    DIENGINE=dipu_v0.4.8_one_iter_tool
 
-
-    rm -rf SMART && git clone -b ${SMART_VERSION} https://github.com/ParrotsDL/SMART.git
+    rm -rf DI-engine && git clone -b ${DIENGINE} https://github.com/DeepLink-org/DI-engine.git
+    rm -rf SMART && git clone -b ${SMART_VERSION} https://github.com/DeepLink-org/SMART.git
     rm -rf mmpretrain && git clone -b ${MMPRETRAIN_VERSION} https://github.com/DeepLink-org/mmpretrain.git
     rm -rf mmdetection && git clone -b ${MMDETECTION_VERSION} https://github.com/DeepLink-org/mmdetection.git
     rm -rf mmsegmentation && git clone -b ${MMSEGMENTATION_VERSION} https://github.com/DeepLink-org/mmsegmentation.git
@@ -36,19 +38,28 @@ function clone_needed_repo() {
 function build_needed_repo_cuda() {
     cd mmcv
     MMCV_WITH_DIOPI=1 MMCV_WITH_OPS=1 python setup.py build_ext -i
+    cd ..
     # cd ../mmdet 
     # pip install -e . --no-deps
     # cd ../mmyolo
     # # Install albumentations
     # pip install -r requirements/albu.txt --no-deps
     # # Install MMYOLO
     # pip install -e . --no-deps
-    cd ..
     # cd mmagic
-    # pip install -e . -v --no-deps
+    # pip install -e . -v 
     # cd ../mmpretrain
     # pip install -e .
     # cd ..
+    # cd DI-engine
+    # pip install -e .
+    # cd ..
+    # #安装强化学习需要用的包
+    # pip install lz4
+    # pip install readerwriterlock
+    # pip install Flask==2.1.0
+    # pip install transformers
+    # pip install accelerate
 }
 
 function build_needed_repo_camb() {
@@ -66,6 +77,7 @@ function export_repo_pythonpath(){
         export PYTHONPATH=${basic_path}/mmagic:$PYTHONPATH
         export PYTHONPATH=${basic_path}/data/stable-diffusion-v1-5:$PYTHONPATH
         export PYTHONPATH=${basic_path}/mmagic/mmagic/models/editors/stable_diffusion:$PYTHONPATH
+        export PYTHONPATH=${basic_path}/DI-engine:$PYTHONPATH
     elif [ "$1" = "camb" ]; then
         echo "Executing CAMB operation in pythonpath..."
         export PYTHONPATH=/mnt/lustre/share/platform/env/miniconda3.8/envs/pt2.0_diopi/mmcvs/9b1209f:$PYTHONPATH
@@ -97,11 +109,12 @@ function build_dataset(){
         ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/imagenet data/imagenet
         ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/coco  data/coco
         ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/cityscapes data/cityscapes
-        # ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/kinetics400 data/kinetics400  #数据集还在迁移
+        ln -s /mnt/lustre/share_data/openmmlab/datasets/action/Kinetics400 data/kinetics400 
         ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/icdar2015 data/icdar2015
         ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/mjsynth data/mjsynth
         ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/kitti data/kitti
         ln -s /mnt/lustre/share_data/shenliancheng/swin_large_patch4_window12_384_22k.pth data/swin_large_patch4_window12_384_22k.pth
+        ln -s /mnt/lustre/share_data/parrots.tester.s.03/models_code/mmagic/stable-diffusion-v1-5 data/stable-diffusion-v1-5
 
     elif [ "$1" = "camb" ]; then
         echo "Executing CAMB operation in build dataset..."

@@ -20,7 +20,7 @@
 error_flag = multiprocessing.Value('i',0) #if encount error
 
 if device_type == 'cuda':
-    random_model_num = 8
+    random_model_num = 100
     print("we use cuda!")
 else:
     random_model_num = 100
@@ -57,10 +57,22 @@ def process_one_iter(model_info):
     p3 = model_info_list[2]
     p4 = model_info_list[3] if len(model_info_list) == 4 else ""
 
-    train_path = p1 + "/tools/train.py"
-    config_path = p1 + "/configs/" + p2
-    work_dir = "--work-dir=./one_iter_data/" + p3
-    opt_arg = p4
+    if("mm" in p1):
+        train_path = p1 + "/tools/train.py"
+        config_path = p1 + "/configs/" + p2
+        work_dir = "--work-dir=./one_iter_data/" + p3
+        opt_arg = p4
+        package_name = "mmlab"
+    elif("DI" in p1):
+        train_path = p1+"/"+p2
+        config_path = ""
+        work_dir = ""
+        opt_arg = ""
+        package_name = "diengine"
+    else:
+        print("Wrong model info in  {}".format(model_info), flush = True)
+        exit(1)
+
     os.environ['ONE_ITER_TOOL_STORAGE_PATH'] = os.getcwd()+"/one_iter_data/" + p3
 
     storage_path = os.environ['ONE_ITER_TOOL_STORAGE_PATH']
@@ -93,11 +105,15 @@ def process_one_iter(model_info):
     github_job_name = github_job #为了方便统一scancel，因此使用同样的jobname
 
     if device_type == 'cuda':
-        cmd_run_one_iter = "srun --job-name={} --partition={}  --gres={} --cpus-per-task=5 --mem=16G --time=40 sh SMART/tools/one_iter_tool/run_one_iter.sh {} {} {} {}".format(github_job_name, slurm_par, gpu_requests, train_path, config_path, work_dir, opt_arg)
-        cmd_cp_one_iter = "srun --job-name={} --partition={}  --gres={} --cpus-per-task=5 --mem=16G --time=30 sh SMART/tools/one_iter_tool/compare_one_iter.sh".format(github_job_name, slurm_par, gpu_requests)
+        if(p2 == "stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py"):
+            cmd_run_one_iter = "srun --job-name={} --partition={}  --gres={} --cpus-per-task=5 --mem=16G --time=40 sh mmagic/configs/stable_diffusion/stable-diffusion_ddim_denoisingunet_one_iter.sh".format(github_job_name, slurm_par, gpu_requests)
+            cmd_cp_one_iter = ""
+        else:
+            cmd_run_one_iter = "srun --job-name={} --partition={}  --gres={} --cpus-per-task=5 --mem=16G --time=40 sh SMART/tools/one_iter_tool/run_one_iter.sh {} {} {} {}".format(github_job_name, slurm_par, gpu_requests, train_path, config_path, work_dir, opt_arg)
+            cmd_cp_one_iter = "srun --job-name={} --partition={}  --gres={} --cpus-per-task=5 --mem=16G --time=30 sh SMART/tools/one_iter_tool/compare_one_iter.sh {}".format(github_job_name, slurm_par, gpu_requests, package_name)
     else:
         cmd_run_one_iter = "srun --job-name={} --partition={}  --gres={} --time=40 sh SMART/tools/one_iter_tool/run_one_iter.sh {} {} {} {}".format(github_job_name, slurm_par, gpu_requests, train_path, config_path, work_dir, opt_arg)
-        cmd_cp_one_iter = "srun --job-name={} --partition={}  --gres={} --time=30 sh SMART/tools/one_iter_tool/compare_one_iter.sh".format(github_job_name, slurm_par, gpu_requests)
+        cmd_cp_one_iter = "srun --job-name={} --partition={}  --gres={} --time=30 sh SMART/tools/one_iter_tool/compare_one_iter.sh {}".format(github_job_name, slurm_par, gpu_requests, package_name)
 
     run_cmd(cmd_run_one_iter)
     run_cmd(cmd_cp_one_iter)

@@ -110,6 +110,7 @@ def handle_error(error):
         print("Kill all!", flush = True)
         p.terminate()
     error_flag.value = 1
+    exit(1)
 
 if __name__=='__main__':
     curPath = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))

@@ -50,30 +50,37 @@ cuda:
     - model_cfg: "mmpretrain vision_transformer/vit-base-p16_64xb64_in1k-384px.py workdirs_vit-base-p16_64xb64_in1k-384px"
     - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet-b2_8xb32_in1k"  
     - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenet-v3-large_8xb128_in1k"
+    - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenet-v2_8xb32_in1k"
+    - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext-small_32xb128_in1k"
+    - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py workdirs_shufflenet-v2-1x_16xb64_in1k"
+      fallback_op_list: "native_batch_norm*"
     # mmdetection
     - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr_r50_8xb2-150e_coco" 
     - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3_d53_8xb8-320-273e_coco"
     - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd300_coco" 
     - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco" 
-    - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet_r50_fpn_1x_coco"  
+    - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet_r50_fpn_1x_coco"
+    - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask-rcnn_r50_fpn_1x_coco"
+    - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster-rcnn_r101_fpn_1x_coco"
+    - model_cfg: "mmdetection dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py workdirs_atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco"
     # mmpose
     - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192" 
     # mmaction2
-    # - "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb"  #数据集还在迁移
+    - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb"
     # mmocr    
     - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn_mini-vgg_5e_mj"
-    - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20"  #smart工具对比 cpu存全量时问题消失
+    - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20"  
     # mmsegmentation
     - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_r50-d8_4xb2-40k_cityscapes-512x1024" 
     - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024"
-    # # 超时
-    - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext-small_32xb128_in1k" #时间过长  爆显存
+    - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024" 
+    - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet_r50-d8_4xb2-40k_cityscapes-512x1024" 
+    # mmyolo
     - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py wordir_yolov5_s-v61_syncbn_8xb16-300e_coco"
-    - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024" #时间过长
-    - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet_r50-d8_4xb2-40k_cityscapes-512x1024"  #时间过长
-    - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask-rcnn_r50_fpn_1x_coco"
-    - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class"  #装的库还有问题
-    - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster-rcnn_r101_fpn_1x_coco"
-    # - "mmagic configs/stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable-diffusion_ddim_denoisingunet.py"   #模型文件还缺少
-    - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenet-v2_8xb32_in1k"
-    - model_cfg: "mmdetection dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py workdirs_atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco" #精度问题
+    # mmdetection3d
+    - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class"
+    # DI-engine
+    - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo"
+    - model_cfg: "DI-engine ding/example/sac.py workdirs_sac"
+    # mmagic
+    - model_cfg: "mmagic stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable-diffusion_ddim_denoisingunet"