From 9c2e8a937b342b693dd94804b0936534829fc265 Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Sat, 5 Aug 2023 20:29:19 +0800 Subject: [PATCH 01/19] modify ci cuda models --- scripts/ci/ci_one_iter.sh | 2 +- scripts/ci/ci_run_one_iter.py | 2 +- scripts/ci/test_one_iter_model_list.yaml | 24 +++++++++++++----------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/scripts/ci/ci_one_iter.sh b/scripts/ci/ci_one_iter.sh index b1fdda794..e8dcb7a28 100644 --- a/scripts/ci/ci_one_iter.sh +++ b/scripts/ci/ci_one_iter.sh @@ -97,7 +97,7 @@ function build_dataset(){ ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/imagenet data/imagenet ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/coco data/coco ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/cityscapes data/cityscapes - # ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/kinetics400 data/kinetics400 #数据集还在迁移 + ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/mmaction2/Kinetics400 data/kinetics400 ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/icdar2015 data/icdar2015 ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/mjsynth data/mjsynth ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/kitti data/kitti diff --git a/scripts/ci/ci_run_one_iter.py b/scripts/ci/ci_run_one_iter.py index 27c80543b..b70a8d430 100644 --- a/scripts/ci/ci_run_one_iter.py +++ b/scripts/ci/ci_run_one_iter.py @@ -20,7 +20,7 @@ error_flag = multiprocessing.Value('i',0) #if encount error if device_type == 'cuda': - random_model_num = 8 + random_model_num = 100 print("we use cuda!") else: random_model_num = 100 diff --git a/scripts/ci/test_one_iter_model_list.yaml b/scripts/ci/test_one_iter_model_list.yaml index 007f52f50..9532e516f 100644 --- a/scripts/ci/test_one_iter_model_list.yaml +++ b/scripts/ci/test_one_iter_model_list.yaml @@ -38,30 +38,32 @@ cuda: - model_cfg: "mmpretrain vision_transformer/vit-base-p16_64xb64_in1k-384px.py workdirs_vit-base-p16_64xb64_in1k-384px" - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet-b2_8xb32_in1k" - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenet-v3-large_8xb128_in1k" + - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenet-v2_8xb32_in1k" + - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext-small_32xb128_in1k" # mmdetection - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr_r50_8xb2-150e_coco" - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3_d53_8xb8-320-273e_coco" - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd300_coco" - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco" - - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet_r50_fpn_1x_coco" + - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet_r50_fpn_1x_coco" + - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask-rcnn_r50_fpn_1x_coco" + - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster-rcnn_r101_fpn_1x_coco" + - model_cfg: "mmdetection dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py workdirs_atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco" # mmpose - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192" # mmaction2 # - "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" #数据集还在迁移 # mmocr - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn_mini-vgg_5e_mj" - - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20" #smart工具对比 cpu存全量时问题消失 + - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20" # mmsegmentation - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_r50-d8_4xb2-40k_cityscapes-512x1024" - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024" - # # 超时 - - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext-small_32xb128_in1k" #时间过长 爆显存 + - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024" + - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet_r50-d8_4xb2-40k_cityscapes-512x1024" + # mmyolo - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py wordir_yolov5_s-v61_syncbn_8xb16-300e_coco" - - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024" #时间过长 - - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet_r50-d8_4xb2-40k_cityscapes-512x1024" #时间过长 - - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask-rcnn_r50_fpn_1x_coco" - - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class" #装的库还有问题 - - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster-rcnn_r101_fpn_1x_coco" + # mmdetection3d + - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class" + # mmagic # - "mmagic configs/stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable-diffusion_ddim_denoisingunet.py" #模型文件还缺少 - - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenet-v2_8xb32_in1k" - - model_cfg: "mmdetection dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py workdirs_atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco" #精度问题 From cba4ee2d8441afd3eb48f165097b78d525b2337e Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Sat, 5 Aug 2023 20:31:08 +0800 Subject: [PATCH 02/19] add td-hm_hrnet --- scripts/ci/test_one_iter_model_list.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ci/test_one_iter_model_list.yaml b/scripts/ci/test_one_iter_model_list.yaml index 9532e516f..6a26fe794 100644 --- a/scripts/ci/test_one_iter_model_list.yaml +++ b/scripts/ci/test_one_iter_model_list.yaml @@ -52,7 +52,7 @@ cuda: # mmpose - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192" # mmaction2 - # - "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" #数据集还在迁移 + - "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" # mmocr - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn_mini-vgg_5e_mj" - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20" From b001fcda32fc77784753fd9f020211f09b768b55 Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Sat, 5 Aug 2023 21:24:43 +0800 Subject: [PATCH 03/19] fix bug --- scripts/ci/test_one_iter_model_list.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/ci/test_one_iter_model_list.yaml b/scripts/ci/test_one_iter_model_list.yaml index 6a26fe794..73fa8b4a9 100644 --- a/scripts/ci/test_one_iter_model_list.yaml +++ b/scripts/ci/test_one_iter_model_list.yaml @@ -52,7 +52,7 @@ cuda: # mmpose - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192" # mmaction2 - - "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" + - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" # mmocr - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn_mini-vgg_5e_mj" - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20" @@ -66,4 +66,4 @@ cuda: # mmdetection3d - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class" # mmagic - # - "mmagic configs/stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable-diffusion_ddim_denoisingunet.py" #模型文件还缺少 + # - model_cfg: "mmagic configs/stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable-diffusion_ddim_denoisingunet.py" #模型文件还缺少 From 4e11bf843e970dc900a54fda6a3389c4b9470b37 Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Sat, 5 Aug 2023 21:53:51 +0800 Subject: [PATCH 04/19] add shuffle_net --- scripts/ci/test_one_iter_model_list.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/ci/test_one_iter_model_list.yaml b/scripts/ci/test_one_iter_model_list.yaml index 73fa8b4a9..241856ec1 100644 --- a/scripts/ci/test_one_iter_model_list.yaml +++ b/scripts/ci/test_one_iter_model_list.yaml @@ -40,6 +40,8 @@ cuda: - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenet-v3-large_8xb128_in1k" - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenet-v2_8xb32_in1k" - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext-small_32xb128_in1k" + - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py workdirs_shufflenet-v2-1x_16xb64_in1k" + fallback_op_list: "native_batch_norm*" # mmdetection - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr_r50_8xb2-150e_coco" - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3_d53_8xb8-320-273e_coco" From 837d77305ea411f06eca821579d74f2d903c3b5b Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Sat, 5 Aug 2023 22:06:37 +0800 Subject: [PATCH 05/19] update kinetics400 --- scripts/ci/ci_one_iter.sh | 2 +- scripts/ci/test_one_iter_model_list.yaml | 2 +- third_party/DIOPI | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/ci/ci_one_iter.sh b/scripts/ci/ci_one_iter.sh index e8dcb7a28..00d8e7306 100644 --- a/scripts/ci/ci_one_iter.sh +++ b/scripts/ci/ci_one_iter.sh @@ -97,7 +97,7 @@ function build_dataset(){ ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/imagenet data/imagenet ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/coco data/coco ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/cityscapes data/cityscapes - ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/mmaction2/Kinetics400 data/kinetics400 + # ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/mmaction2/Kinetics400 data/kinetics400 ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/icdar2015 data/icdar2015 ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/mjsynth data/mjsynth ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/kitti data/kitti diff --git a/scripts/ci/test_one_iter_model_list.yaml b/scripts/ci/test_one_iter_model_list.yaml index 241856ec1..b1316a718 100644 --- a/scripts/ci/test_one_iter_model_list.yaml +++ b/scripts/ci/test_one_iter_model_list.yaml @@ -54,7 +54,7 @@ cuda: # mmpose - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192" # mmaction2 - - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" + # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" #数据集还没准备好 # mmocr - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn_mini-vgg_5e_mj" - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20" diff --git a/third_party/DIOPI b/third_party/DIOPI index 672399c32..8e468eb25 160000 --- a/third_party/DIOPI +++ b/third_party/DIOPI @@ -1 +1 @@ -Subproject commit 672399c32758453071bd567fdc232420108c066c +Subproject commit 8e468eb2587af6f964f812c9f08f488768c4fb36 From 9b4d1a136c43886eb76f77a48c45724eaada1829 Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Mon, 7 Aug 2023 10:57:08 +0800 Subject: [PATCH 06/19] test DI-engine --- scripts/ci/ci_one_iter.sh | 11 +++- scripts/ci/ci_run_one_iter.py | 24 ++++++-- scripts/ci/test_one_iter_model_list.yaml | 77 ++++++++++++------------ 3 files changed, 67 insertions(+), 45 deletions(-) diff --git a/scripts/ci/ci_one_iter.sh b/scripts/ci/ci_one_iter.sh index 00d8e7306..07c96134d 100644 --- a/scripts/ci/ci_one_iter.sh +++ b/scripts/ci/ci_one_iter.sh @@ -14,10 +14,11 @@ function clone_needed_repo() { MMACTION2_VERSION=dipu_v1.0.0_one_iter_tool MMOCR_VERSION=dipu_v1.0.0_one_iter_tool MMAGIC=dipu_v1.0.0_one_iter_tool - SMART_VERSION=dev_for_mmcv2.0 + SMART_VERSION=slc/support-eiengine-trans MMYOLO=dipu_v0.5.0_one_iter_tool + DIENGINE=dipu_v0.4.8_one_iter_tool - + rm -rf DI-engine && git clone -b ${DIENGINE} https://github.com/DeepLink-org/DI-engine.git rm -rf SMART && git clone -b ${SMART_VERSION} https://github.com/ParrotsDL/SMART.git rm -rf mmpretrain && git clone -b ${MMPRETRAIN_VERSION} https://github.com/DeepLink-org/mmpretrain.git rm -rf mmdetection && git clone -b ${MMDETECTION_VERSION} https://github.com/DeepLink-org/mmdetection.git @@ -49,6 +50,11 @@ function build_needed_repo_cuda() { # cd ../mmpretrain # pip install -e . # cd .. + + #安装强化学习需要用的包 + pip install lz4 + pip install readerwriterlock + pip install Flask==2.1.0 } function build_needed_repo_camb() { @@ -66,6 +72,7 @@ function export_repo_pythonpath(){ export PYTHONPATH=${basic_path}/mmagic:$PYTHONPATH export PYTHONPATH=${basic_path}/data/stable-diffusion-v1-5:$PYTHONPATH export PYTHONPATH=${basic_path}/mmagic/mmagic/models/editors/stable_diffusion:$PYTHONPATH + export PYTHONPATH=${basic_path}/DI-engine:$PYTHONPATH elif [ "$1" = "camb" ]; then echo "Executing CAMB operation in pythonpath..." export PYTHONPATH=/mnt/lustre/share/platform/env/miniconda3.8/envs/pt2.0_diopi/mmcvs/9b1209f:$PYTHONPATH diff --git a/scripts/ci/ci_run_one_iter.py b/scripts/ci/ci_run_one_iter.py index b70a8d430..a6e5b9c05 100644 --- a/scripts/ci/ci_run_one_iter.py +++ b/scripts/ci/ci_run_one_iter.py @@ -57,10 +57,22 @@ def process_one_iter(model_info): p3 = model_info_list[2] p4 = model_info_list[3] if len(model_info_list) == 4 else "" - train_path = p1 + "/tools/train.py" - config_path = p1 + "/configs/" + p2 - work_dir = "--work-dir=./one_iter_data/" + p3 - opt_arg = p4 + if("mm" in p1): + train_path = p1 + "/tools/train.py" + config_path = p1 + "/configs/" + p2 + work_dir = "--work-dir=./one_iter_data/" + p3 + opt_arg = p4 + package_name = "mmlab" + elif("DI" in p1): + train_path = p1/p2 + config_path = "" + work_dir = "" + opt_arg = "" + package_name = "diengine" + else: + print("Wrong model info in {}".format(model_info), flush = True) + exit(1) + os.environ['ONE_ITER_TOOL_STORAGE_PATH'] = os.getcwd()+"/one_iter_data/" + p3 storage_path = os.environ['ONE_ITER_TOOL_STORAGE_PATH'] @@ -94,10 +106,10 @@ def process_one_iter(model_info): if device_type == 'cuda': cmd_run_one_iter = "srun --job-name={} --partition={} --gres={} --cpus-per-task=5 --mem=16G --time=40 sh SMART/tools/one_iter_tool/run_one_iter.sh {} {} {} {}".format(github_job_name, slurm_par, gpu_requests, train_path, config_path, work_dir, opt_arg) - cmd_cp_one_iter = "srun --job-name={} --partition={} --gres={} --cpus-per-task=5 --mem=16G --time=30 sh SMART/tools/one_iter_tool/compare_one_iter.sh".format(github_job_name, slurm_par, gpu_requests) + cmd_cp_one_iter = "srun --job-name={} --partition={} --gres={} --cpus-per-task=5 --mem=16G --time=30 sh SMART/tools/one_iter_tool/compare_one_iter.sh {}".format(github_job_name, slurm_par, gpu_requests, package_name) else: cmd_run_one_iter = "srun --job-name={} --partition={} --gres={} --time=40 sh SMART/tools/one_iter_tool/run_one_iter.sh {} {} {} {}".format(github_job_name, slurm_par, gpu_requests, train_path, config_path, work_dir, opt_arg) - cmd_cp_one_iter = "srun --job-name={} --partition={} --gres={} --time=30 sh SMART/tools/one_iter_tool/compare_one_iter.sh".format(github_job_name, slurm_par, gpu_requests) + cmd_cp_one_iter = "srun --job-name={} --partition={} --gres={} --time=30 sh SMART/tools/one_iter_tool/compare_one_iter.sh {}".format(github_job_name, slurm_par, gpu_requests, package_name) run_cmd(cmd_run_one_iter) run_cmd(cmd_cp_one_iter) diff --git a/scripts/ci/test_one_iter_model_list.yaml b/scripts/ci/test_one_iter_model_list.yaml index b1316a718..43f26a6c0 100644 --- a/scripts/ci/test_one_iter_model_list.yaml +++ b/scripts/ci/test_one_iter_model_list.yaml @@ -32,40 +32,43 @@ camb: cuda: - # # mmpretrain - - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet50_8xb32_in1k" - - model_cfg: "mmpretrain swin_transformer/swin-large_16xb64_in1k.py workdirs_swin-large_16xb64_in1k" - - model_cfg: "mmpretrain vision_transformer/vit-base-p16_64xb64_in1k-384px.py workdirs_vit-base-p16_64xb64_in1k-384px" - - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet-b2_8xb32_in1k" - - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenet-v3-large_8xb128_in1k" - - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenet-v2_8xb32_in1k" - - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext-small_32xb128_in1k" - - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py workdirs_shufflenet-v2-1x_16xb64_in1k" - fallback_op_list: "native_batch_norm*" - # mmdetection - - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr_r50_8xb2-150e_coco" - - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3_d53_8xb8-320-273e_coco" - - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd300_coco" - - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco" - - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet_r50_fpn_1x_coco" - - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask-rcnn_r50_fpn_1x_coco" - - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster-rcnn_r101_fpn_1x_coco" - - model_cfg: "mmdetection dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py workdirs_atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco" - # mmpose - - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192" - # mmaction2 - # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" #数据集还没准备好 - # mmocr - - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn_mini-vgg_5e_mj" - - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20" - # mmsegmentation - - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_r50-d8_4xb2-40k_cityscapes-512x1024" - - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024" - - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024" - - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet_r50-d8_4xb2-40k_cityscapes-512x1024" - # mmyolo - - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py wordir_yolov5_s-v61_syncbn_8xb16-300e_coco" - # mmdetection3d - - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class" - # mmagic - # - model_cfg: "mmagic configs/stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable-diffusion_ddim_denoisingunet.py" #模型文件还缺少 + # # # mmpretrain + # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet50_8xb32_in1k" + # - model_cfg: "mmpretrain swin_transformer/swin-large_16xb64_in1k.py workdirs_swin-large_16xb64_in1k" + # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_64xb64_in1k-384px.py workdirs_vit-base-p16_64xb64_in1k-384px" + # - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet-b2_8xb32_in1k" + # - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenet-v3-large_8xb128_in1k" + # - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenet-v2_8xb32_in1k" + # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext-small_32xb128_in1k" + # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py workdirs_shufflenet-v2-1x_16xb64_in1k" + # fallback_op_list: "native_batch_norm*" + # # mmdetection + # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr_r50_8xb2-150e_coco" + # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3_d53_8xb8-320-273e_coco" + # - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd300_coco" + # - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco" + # - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet_r50_fpn_1x_coco" + # - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask-rcnn_r50_fpn_1x_coco" + # - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster-rcnn_r101_fpn_1x_coco" + # - model_cfg: "mmdetection dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py workdirs_atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco" + # # mmpose + # - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192" + # # mmaction2 + # # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" #数据集还没准备好 + # # mmocr + # - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn_mini-vgg_5e_mj" + # - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20" + # # mmsegmentation + # - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_r50-d8_4xb2-40k_cityscapes-512x1024" + # - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024" + # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024" + # - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet_r50-d8_4xb2-40k_cityscapes-512x1024" + # # mmyolo + # - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py wordir_yolov5_s-v61_syncbn_8xb16-300e_coco" + # # mmdetection3d + # - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class" + # # mmagic + # # - model_cfg: "mmagic configs/stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable-diffusion_ddim_denoisingunet.py" #模型文件还缺少 + # DI-engine + - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" + - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" From 97aea06fdfc5f62ab0dfe4fb5f591d21e98b4f22 Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Mon, 7 Aug 2023 11:12:07 +0800 Subject: [PATCH 07/19] fix bug --- scripts/ci/ci_run_one_iter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ci/ci_run_one_iter.py b/scripts/ci/ci_run_one_iter.py index a6e5b9c05..2ae369817 100644 --- a/scripts/ci/ci_run_one_iter.py +++ b/scripts/ci/ci_run_one_iter.py @@ -64,7 +64,7 @@ def process_one_iter(model_info): opt_arg = p4 package_name = "mmlab" elif("DI" in p1): - train_path = p1/p2 + train_path = p1+"/"+p2 config_path = "" work_dir = "" opt_arg = "" From 6d94c5bad4abd368586bddb33874ccfd9e4e2b63 Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Mon, 7 Aug 2023 11:27:43 +0800 Subject: [PATCH 08/19] update DIOPI --- third_party/DIOPI | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/DIOPI b/third_party/DIOPI index 8e468eb25..3bc23f302 160000 --- a/third_party/DIOPI +++ b/third_party/DIOPI @@ -1 +1 @@ -Subproject commit 8e468eb2587af6f964f812c9f08f488768c4fb36 +Subproject commit 3bc23f30224ada25e177d8834f83e5129c362a89 From 01cfd5298535bddcb6f8bd8228c6b33d9af11099 Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Mon, 7 Aug 2023 11:30:21 +0800 Subject: [PATCH 09/19] install package --- scripts/ci/ci_one_iter.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/ci/ci_one_iter.sh b/scripts/ci/ci_one_iter.sh index 07c96134d..c7715f664 100644 --- a/scripts/ci/ci_one_iter.sh +++ b/scripts/ci/ci_one_iter.sh @@ -50,7 +50,9 @@ function build_needed_repo_cuda() { # cd ../mmpretrain # pip install -e . # cd .. - + cd DI-engine + pip install -e . + cd .. #安装强化学习需要用的包 pip install lz4 pip install readerwriterlock From 878faa303c482fcb19dab8ac6de8a206b671ce4e Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Mon, 7 Aug 2023 13:24:12 +0800 Subject: [PATCH 10/19] test all models --- scripts/ci/ci_one_iter.sh | 14 ++--- scripts/ci/test_one_iter_model_list.yaml | 74 ++++++++++++------------ 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/scripts/ci/ci_one_iter.sh b/scripts/ci/ci_one_iter.sh index c7715f664..fa22f246f 100644 --- a/scripts/ci/ci_one_iter.sh +++ b/scripts/ci/ci_one_iter.sh @@ -50,13 +50,13 @@ function build_needed_repo_cuda() { # cd ../mmpretrain # pip install -e . # cd .. - cd DI-engine - pip install -e . - cd .. - #安装强化学习需要用的包 - pip install lz4 - pip install readerwriterlock - pip install Flask==2.1.0 + # cd DI-engine + # pip install -e . + # cd .. + # #安装强化学习需要用的包 + # pip install lz4 + # pip install readerwriterlock + # pip install Flask==2.1.0 } function build_needed_repo_camb() { diff --git a/scripts/ci/test_one_iter_model_list.yaml b/scripts/ci/test_one_iter_model_list.yaml index 43f26a6c0..189432506 100644 --- a/scripts/ci/test_one_iter_model_list.yaml +++ b/scripts/ci/test_one_iter_model_list.yaml @@ -32,43 +32,43 @@ camb: cuda: - # # # mmpretrain - # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet50_8xb32_in1k" - # - model_cfg: "mmpretrain swin_transformer/swin-large_16xb64_in1k.py workdirs_swin-large_16xb64_in1k" - # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_64xb64_in1k-384px.py workdirs_vit-base-p16_64xb64_in1k-384px" - # - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet-b2_8xb32_in1k" - # - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenet-v3-large_8xb128_in1k" - # - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenet-v2_8xb32_in1k" - # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext-small_32xb128_in1k" - # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py workdirs_shufflenet-v2-1x_16xb64_in1k" - # fallback_op_list: "native_batch_norm*" - # # mmdetection - # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr_r50_8xb2-150e_coco" - # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3_d53_8xb8-320-273e_coco" - # - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd300_coco" - # - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco" - # - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet_r50_fpn_1x_coco" - # - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask-rcnn_r50_fpn_1x_coco" - # - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster-rcnn_r101_fpn_1x_coco" - # - model_cfg: "mmdetection dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py workdirs_atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco" - # # mmpose - # - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192" - # # mmaction2 - # # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" #数据集还没准备好 - # # mmocr - # - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn_mini-vgg_5e_mj" - # - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20" - # # mmsegmentation - # - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_r50-d8_4xb2-40k_cityscapes-512x1024" - # - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024" - # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024" - # - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet_r50-d8_4xb2-40k_cityscapes-512x1024" - # # mmyolo - # - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py wordir_yolov5_s-v61_syncbn_8xb16-300e_coco" - # # mmdetection3d - # - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class" - # # mmagic - # # - model_cfg: "mmagic configs/stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable-diffusion_ddim_denoisingunet.py" #模型文件还缺少 + # # mmpretrain + - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet50_8xb32_in1k" + - model_cfg: "mmpretrain swin_transformer/swin-large_16xb64_in1k.py workdirs_swin-large_16xb64_in1k" + - model_cfg: "mmpretrain vision_transformer/vit-base-p16_64xb64_in1k-384px.py workdirs_vit-base-p16_64xb64_in1k-384px" + - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet-b2_8xb32_in1k" + - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenet-v3-large_8xb128_in1k" + - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenet-v2_8xb32_in1k" + - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext-small_32xb128_in1k" + - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py workdirs_shufflenet-v2-1x_16xb64_in1k" + fallback_op_list: "native_batch_norm*" + # mmdetection + - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr_r50_8xb2-150e_coco" + - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3_d53_8xb8-320-273e_coco" + - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd300_coco" + - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco" + - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet_r50_fpn_1x_coco" + - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask-rcnn_r50_fpn_1x_coco" + - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster-rcnn_r101_fpn_1x_coco" + - model_cfg: "mmdetection dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py workdirs_atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco" + # mmpose + - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192" + # mmaction2 + # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" #数据集还没准备好 + # mmocr + - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn_mini-vgg_5e_mj" + - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20" + # mmsegmentation + - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_r50-d8_4xb2-40k_cityscapes-512x1024" + - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024" + - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024" + - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet_r50-d8_4xb2-40k_cityscapes-512x1024" + # mmyolo + - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py wordir_yolov5_s-v61_syncbn_8xb16-300e_coco" + # mmdetection3d + - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class" + # mmagic + # - model_cfg: "mmagic configs/stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable-diffusion_ddim_denoisingunet.py" #模型文件还缺少 # DI-engine - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" From 18ce86b40661bf01ef6b94ec0b22187dcbe85f76 Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Mon, 7 Aug 2023 14:40:57 +0800 Subject: [PATCH 11/19] add exit(1) --- scripts/ci/nv/ci_nv_run_one_iter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/ci/nv/ci_nv_run_one_iter.py b/scripts/ci/nv/ci_nv_run_one_iter.py index 446a67287..c40241f88 100644 --- a/scripts/ci/nv/ci_nv_run_one_iter.py +++ b/scripts/ci/nv/ci_nv_run_one_iter.py @@ -110,6 +110,7 @@ def handle_error(error): print("Kill all!", flush = True) p.terminate() error_flag.value = 1 + exit(1) if __name__=='__main__': curPath = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) From e11a56c2616afb217b11abe5cde39f87e80dd744 Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Mon, 7 Aug 2023 15:23:38 +0800 Subject: [PATCH 12/19] lint --- scripts/ci/ci_one_iter.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/ci/ci_one_iter.sh b/scripts/ci/ci_one_iter.sh index 267eafccf..cfda69dce 100644 --- a/scripts/ci/ci_one_iter.sh +++ b/scripts/ci/ci_one_iter.sh @@ -1,7 +1,6 @@ #!/bin/bash function clone_needed_repo() { - set -e # clone some repositories From ecf6961e8ebae8cead0ebcc8dbd7509289d267b2 Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Tue, 8 Aug 2023 14:54:23 +0800 Subject: [PATCH 13/19] test stable diffusion --- scripts/ci/ci_one_iter.sh | 1 + scripts/ci/ci_run_one_iter.py | 8 ++- scripts/ci/test_one_iter_model_list.yaml | 78 ++++++++++++------------ 3 files changed, 46 insertions(+), 41 deletions(-) diff --git a/scripts/ci/ci_one_iter.sh b/scripts/ci/ci_one_iter.sh index cfda69dce..38f3ea6ad 100644 --- a/scripts/ci/ci_one_iter.sh +++ b/scripts/ci/ci_one_iter.sh @@ -112,6 +112,7 @@ function build_dataset(){ ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/mjsynth data/mjsynth ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/kitti data/kitti ln -s /mnt/lustre/share_data/shenliancheng/swin_large_patch4_window12_384_22k.pth data/swin_large_patch4_window12_384_22k.pth + ln -s /mnt/lustre/share_data/parrots.tester.s.03/models_code/mmagic/stable-diffusion-v1-5 data/stable-diffusion-v1-5 elif [ "$1" = "camb" ]; then echo "Executing CAMB operation in build dataset..." diff --git a/scripts/ci/ci_run_one_iter.py b/scripts/ci/ci_run_one_iter.py index 2ae369817..5cf4188e5 100644 --- a/scripts/ci/ci_run_one_iter.py +++ b/scripts/ci/ci_run_one_iter.py @@ -105,8 +105,12 @@ def process_one_iter(model_info): github_job_name = github_job #为了方便统一scancel,因此使用同样的jobname if device_type == 'cuda': - cmd_run_one_iter = "srun --job-name={} --partition={} --gres={} --cpus-per-task=5 --mem=16G --time=40 sh SMART/tools/one_iter_tool/run_one_iter.sh {} {} {} {}".format(github_job_name, slurm_par, gpu_requests, train_path, config_path, work_dir, opt_arg) - cmd_cp_one_iter = "srun --job-name={} --partition={} --gres={} --cpus-per-task=5 --mem=16G --time=30 sh SMART/tools/one_iter_tool/compare_one_iter.sh {}".format(github_job_name, slurm_par, gpu_requests, package_name) + if(p2 == "stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py"): + cmd_run_one_iter = "srun --job-name={} --partition={} --gres={} --cpus-per-task=5 --mem=16G --time=40 sh mmagic/configs/stable_diffusion/stable-diffusion_ddim_denoisingunet_one_iter.sh".format(github_job_name, slurm_par, gpu_requests) + cmd_cp_one_iter = "" + else: + cmd_run_one_iter = "srun --job-name={} --partition={} --gres={} --cpus-per-task=5 --mem=16G --time=40 sh SMART/tools/one_iter_tool/run_one_iter.sh {} {} {} {}".format(github_job_name, slurm_par, gpu_requests, train_path, config_path, work_dir, opt_arg) + cmd_cp_one_iter = "srun --job-name={} --partition={} --gres={} --cpus-per-task=5 --mem=16G --time=30 sh SMART/tools/one_iter_tool/compare_one_iter.sh {}".format(github_job_name, slurm_par, gpu_requests, package_name) else: cmd_run_one_iter = "srun --job-name={} --partition={} --gres={} --time=40 sh SMART/tools/one_iter_tool/run_one_iter.sh {} {} {} {}".format(github_job_name, slurm_par, gpu_requests, train_path, config_path, work_dir, opt_arg) cmd_cp_one_iter = "srun --job-name={} --partition={} --gres={} --time=30 sh SMART/tools/one_iter_tool/compare_one_iter.sh {}".format(github_job_name, slurm_par, gpu_requests, package_name) diff --git a/scripts/ci/test_one_iter_model_list.yaml b/scripts/ci/test_one_iter_model_list.yaml index 189432506..3970ca73c 100644 --- a/scripts/ci/test_one_iter_model_list.yaml +++ b/scripts/ci/test_one_iter_model_list.yaml @@ -32,43 +32,43 @@ camb: cuda: - # # mmpretrain - - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet50_8xb32_in1k" - - model_cfg: "mmpretrain swin_transformer/swin-large_16xb64_in1k.py workdirs_swin-large_16xb64_in1k" - - model_cfg: "mmpretrain vision_transformer/vit-base-p16_64xb64_in1k-384px.py workdirs_vit-base-p16_64xb64_in1k-384px" - - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet-b2_8xb32_in1k" - - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenet-v3-large_8xb128_in1k" - - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenet-v2_8xb32_in1k" - - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext-small_32xb128_in1k" - - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py workdirs_shufflenet-v2-1x_16xb64_in1k" - fallback_op_list: "native_batch_norm*" - # mmdetection - - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr_r50_8xb2-150e_coco" - - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3_d53_8xb8-320-273e_coco" - - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd300_coco" - - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco" - - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet_r50_fpn_1x_coco" - - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask-rcnn_r50_fpn_1x_coco" - - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster-rcnn_r101_fpn_1x_coco" - - model_cfg: "mmdetection dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py workdirs_atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco" - # mmpose - - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192" - # mmaction2 - # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" #数据集还没准备好 - # mmocr - - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn_mini-vgg_5e_mj" - - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20" - # mmsegmentation - - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_r50-d8_4xb2-40k_cityscapes-512x1024" - - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024" - - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024" - - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet_r50-d8_4xb2-40k_cityscapes-512x1024" - # mmyolo - - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py wordir_yolov5_s-v61_syncbn_8xb16-300e_coco" - # mmdetection3d - - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class" + # # # mmpretrain + # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet50_8xb32_in1k" + # - model_cfg: "mmpretrain swin_transformer/swin-large_16xb64_in1k.py workdirs_swin-large_16xb64_in1k" + # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_64xb64_in1k-384px.py workdirs_vit-base-p16_64xb64_in1k-384px" + # - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet-b2_8xb32_in1k" + # - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenet-v3-large_8xb128_in1k" + # - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenet-v2_8xb32_in1k" + # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext-small_32xb128_in1k" + # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py workdirs_shufflenet-v2-1x_16xb64_in1k" + # fallback_op_list: "native_batch_norm*" + # # mmdetection + # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr_r50_8xb2-150e_coco" + # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3_d53_8xb8-320-273e_coco" + # - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd300_coco" + # - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco" + # - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet_r50_fpn_1x_coco" + # - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask-rcnn_r50_fpn_1x_coco" + # - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster-rcnn_r101_fpn_1x_coco" + # - model_cfg: "mmdetection dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py workdirs_atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco" + # # mmpose + # - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192" + # # mmaction2 + # # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" #数据集还没准备好 + # # mmocr + # - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn_mini-vgg_5e_mj" + # - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20" + # # mmsegmentation + # - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_r50-d8_4xb2-40k_cityscapes-512x1024" + # - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024" + # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024" + # - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet_r50-d8_4xb2-40k_cityscapes-512x1024" + # # mmyolo + # - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py wordir_yolov5_s-v61_syncbn_8xb16-300e_coco" + # # mmdetection3d + # - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class" + # # DI-engine + # - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" + # - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" # mmagic - # - model_cfg: "mmagic configs/stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable-diffusion_ddim_denoisingunet.py" #模型文件还缺少 - # DI-engine - - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" - - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" + - model_cfg: "mmagic stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable-diffusion_ddim_denoisingunet" #模型文件还缺少 From 54360092819b67091e83edc835b5659a0cf957a8 Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Tue, 8 Aug 2023 15:40:17 +0800 Subject: [PATCH 14/19] install mmagic --- scripts/ci/ci_one_iter.sh | 6 +++--- third_party/DIOPI | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/ci/ci_one_iter.sh b/scripts/ci/ci_one_iter.sh index 38f3ea6ad..7c7e429ed 100644 --- a/scripts/ci/ci_one_iter.sh +++ b/scripts/ci/ci_one_iter.sh @@ -46,14 +46,14 @@ function build_needed_repo_cuda() { # # Install MMYOLO # pip install -e . --no-deps cd .. - # cd mmagic - # pip install -e . -v --no-deps + cd mmagic + pip install -e . -v # cd ../mmpretrain # pip install -e . # cd .. # cd DI-engine # pip install -e . - # cd .. + cd .. # #安装强化学习需要用的包 # pip install lz4 # pip install readerwriterlock diff --git a/third_party/DIOPI b/third_party/DIOPI index 3bc23f302..ca88b252f 160000 --- a/third_party/DIOPI +++ b/third_party/DIOPI @@ -1 +1 @@ -Subproject commit 3bc23f30224ada25e177d8834f83e5129c362a89 +Subproject commit ca88b252f82e43e5f41e11b558bfa6de0105329c From 6a318205f351ad479e0e50f44f711b4f686819b7 Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Tue, 8 Aug 2023 16:37:57 +0800 Subject: [PATCH 15/19] pip install transformer --- scripts/ci/ci_one_iter.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/ci/ci_one_iter.sh b/scripts/ci/ci_one_iter.sh index 7c7e429ed..4603260b0 100644 --- a/scripts/ci/ci_one_iter.sh +++ b/scripts/ci/ci_one_iter.sh @@ -46,18 +46,19 @@ function build_needed_repo_cuda() { # # Install MMYOLO # pip install -e . --no-deps cd .. - cd mmagic - pip install -e . -v + # cd mmagic + # pip install -e . -v # cd ../mmpretrain # pip install -e . # cd .. # cd DI-engine # pip install -e . - cd .. + # cd .. # #安装强化学习需要用的包 # pip install lz4 # pip install readerwriterlock # pip install Flask==2.1.0 + pip install transformers } function build_needed_repo_camb() { From c775cab459472d44a035b73c136b1f49c3576be3 Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Tue, 8 Aug 2023 17:28:15 +0800 Subject: [PATCH 16/19] pip install accelerate --- scripts/ci/ci_one_iter.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/ci/ci_one_iter.sh b/scripts/ci/ci_one_iter.sh index 4603260b0..7c05db056 100644 --- a/scripts/ci/ci_one_iter.sh +++ b/scripts/ci/ci_one_iter.sh @@ -59,6 +59,7 @@ function build_needed_repo_cuda() { # pip install readerwriterlock # pip install Flask==2.1.0 pip install transformers + pip install accelerate } function build_needed_repo_camb() { From 74d8b7b773063f1d447986c83c8a33d6eabfe0cb Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Wed, 9 Aug 2023 11:45:38 +0800 Subject: [PATCH 17/19] test all cuda models --- scripts/ci/test_one_iter_model_list.yaml | 76 ++++++++++++------------ 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/scripts/ci/test_one_iter_model_list.yaml b/scripts/ci/test_one_iter_model_list.yaml index 3970ca73c..99218f7a6 100644 --- a/scripts/ci/test_one_iter_model_list.yaml +++ b/scripts/ci/test_one_iter_model_list.yaml @@ -32,43 +32,43 @@ camb: cuda: - # # # mmpretrain - # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet50_8xb32_in1k" - # - model_cfg: "mmpretrain swin_transformer/swin-large_16xb64_in1k.py workdirs_swin-large_16xb64_in1k" - # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_64xb64_in1k-384px.py workdirs_vit-base-p16_64xb64_in1k-384px" - # - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet-b2_8xb32_in1k" - # - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenet-v3-large_8xb128_in1k" - # - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenet-v2_8xb32_in1k" - # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext-small_32xb128_in1k" - # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py workdirs_shufflenet-v2-1x_16xb64_in1k" - # fallback_op_list: "native_batch_norm*" - # # mmdetection - # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr_r50_8xb2-150e_coco" - # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3_d53_8xb8-320-273e_coco" - # - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd300_coco" - # - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco" - # - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet_r50_fpn_1x_coco" - # - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask-rcnn_r50_fpn_1x_coco" - # - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster-rcnn_r101_fpn_1x_coco" - # - model_cfg: "mmdetection dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py workdirs_atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco" - # # mmpose - # - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192" - # # mmaction2 - # # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" #数据集还没准备好 - # # mmocr - # - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn_mini-vgg_5e_mj" - # - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20" - # # mmsegmentation - # - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_r50-d8_4xb2-40k_cityscapes-512x1024" - # - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024" - # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024" - # - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet_r50-d8_4xb2-40k_cityscapes-512x1024" - # # mmyolo - # - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py wordir_yolov5_s-v61_syncbn_8xb16-300e_coco" - # # mmdetection3d - # - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class" - # # DI-engine - # - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" - # - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" + # # mmpretrain + - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet50_8xb32_in1k" + - model_cfg: "mmpretrain swin_transformer/swin-large_16xb64_in1k.py workdirs_swin-large_16xb64_in1k" + - model_cfg: "mmpretrain vision_transformer/vit-base-p16_64xb64_in1k-384px.py workdirs_vit-base-p16_64xb64_in1k-384px" + - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet-b2_8xb32_in1k" + - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenet-v3-large_8xb128_in1k" + - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenet-v2_8xb32_in1k" + - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext-small_32xb128_in1k" + - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py workdirs_shufflenet-v2-1x_16xb64_in1k" + fallback_op_list: "native_batch_norm*" + # mmdetection + - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr_r50_8xb2-150e_coco" + - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3_d53_8xb8-320-273e_coco" + - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd300_coco" + - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco" + - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet_r50_fpn_1x_coco" + - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask-rcnn_r50_fpn_1x_coco" + - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster-rcnn_r101_fpn_1x_coco" + - model_cfg: "mmdetection dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py workdirs_atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco" + # mmpose + - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192" + # mmaction2 + # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" #数据集还没准备好 + # mmocr + - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn_mini-vgg_5e_mj" + - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20" + # mmsegmentation + - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_r50-d8_4xb2-40k_cityscapes-512x1024" + - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024" + - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024" + - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet_r50-d8_4xb2-40k_cityscapes-512x1024" + # mmyolo + - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py wordir_yolov5_s-v61_syncbn_8xb16-300e_coco" + # mmdetection3d + - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class" + # DI-engine + - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" + - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" # mmagic - model_cfg: "mmagic stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable-diffusion_ddim_denoisingunet" #模型文件还缺少 From 5d5a0a84fae60f39353d1c9752022ef1cadb2770 Mon Sep 17 00:00:00 2001 From: slc <1060314685@qq.com> Date: Fri, 11 Aug 2023 11:12:22 +0800 Subject: [PATCH 18/19] test cuda all models --- scripts/ci/ci_one_iter.sh | 10 +++++----- scripts/ci/test_one_iter_model_list.yaml | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/ci/ci_one_iter.sh b/scripts/ci/ci_one_iter.sh index 7c05db056..8d7186f2a 100644 --- a/scripts/ci/ci_one_iter.sh +++ b/scripts/ci/ci_one_iter.sh @@ -20,7 +20,7 @@ function clone_needed_repo() { DIENGINE=dipu_v0.4.8_one_iter_tool rm -rf DI-engine && git clone -b ${DIENGINE} https://github.com/DeepLink-org/DI-engine.git - rm -rf SMART && git clone -b ${SMART_VERSION} https://github.com/ParrotsDL/SMART.git + rm -rf SMART && git clone -b ${SMART_VERSION} https://github.com/DeepLink-org/SMART.git rm -rf mmpretrain && git clone -b ${MMPRETRAIN_VERSION} https://github.com/DeepLink-org/mmpretrain.git rm -rf mmdetection && git clone -b ${MMDETECTION_VERSION} https://github.com/DeepLink-org/mmdetection.git rm -rf mmsegmentation && git clone -b ${MMSEGMENTATION_VERSION} https://github.com/DeepLink-org/mmsegmentation.git @@ -38,6 +38,7 @@ function clone_needed_repo() { function build_needed_repo_cuda() { cd mmcv MMCV_WITH_DIOPI=1 MMCV_WITH_OPS=1 python setup.py build_ext -i + cd .. # cd ../mmdet # pip install -e . --no-deps # cd ../mmyolo @@ -45,7 +46,6 @@ function build_needed_repo_cuda() { # pip install -r requirements/albu.txt --no-deps # # Install MMYOLO # pip install -e . --no-deps - cd .. # cd mmagic # pip install -e . -v # cd ../mmpretrain @@ -58,8 +58,8 @@ function build_needed_repo_cuda() { # pip install lz4 # pip install readerwriterlock # pip install Flask==2.1.0 - pip install transformers - pip install accelerate + # pip install transformers + # pip install accelerate } function build_needed_repo_camb() { @@ -109,7 +109,7 @@ function build_dataset(){ ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/imagenet data/imagenet ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/coco data/coco ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/cityscapes data/cityscapes - # ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/mmaction2/Kinetics400 data/kinetics400 + ln -s /mnt/lustre/share_data/openmmlab/datasets/action/Kinetics400 data/kinetics400 ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/icdar2015 data/icdar2015 ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/mjsynth data/mjsynth ln -s /mnt/lustre/share_data/parrots.tester.s.03/dataset/data_for_ln/kitti data/kitti diff --git a/scripts/ci/test_one_iter_model_list.yaml b/scripts/ci/test_one_iter_model_list.yaml index 99218f7a6..c92e49b8c 100644 --- a/scripts/ci/test_one_iter_model_list.yaml +++ b/scripts/ci/test_one_iter_model_list.yaml @@ -54,7 +54,7 @@ cuda: # mmpose - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192" # mmaction2 - # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" #数据集还没准备好 + - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb" # mmocr - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn_mini-vgg_5e_mj" - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet_resnet50-dcnv2_fpnc_1200e_icdar20" @@ -71,4 +71,4 @@ cuda: - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" # mmagic - - model_cfg: "mmagic stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable-diffusion_ddim_denoisingunet" #模型文件还缺少 + - model_cfg: "mmagic stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable-diffusion_ddim_denoisingunet" From c21ab4aab546dd18b957dac7002f279437edb074 Mon Sep 17 00:00:00 2001 From: lc shen <60810292+wey-code@users.noreply.github.com> Date: Thu, 17 Aug 2023 11:33:45 +0800 Subject: [PATCH 19/19] Update ci_one_iter.sh update SMART_VERSION --- scripts/ci/ci_one_iter.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ci/ci_one_iter.sh b/scripts/ci/ci_one_iter.sh index 8d7186f2a..cf5013608 100644 --- a/scripts/ci/ci_one_iter.sh +++ b/scripts/ci/ci_one_iter.sh @@ -15,7 +15,7 @@ function clone_needed_repo() { MMACTION2_VERSION=dipu_v1.0.0_one_iter_tool MMOCR_VERSION=dipu_v1.0.0_one_iter_tool MMAGIC=dipu_v1.0.0_one_iter_tool - SMART_VERSION=slc/support-eiengine-trans + SMART_VERSION=dev_for_mmcv2.0 MMYOLO=dipu_v0.5.0_one_iter_tool DIENGINE=dipu_v0.4.8_one_iter_tool