From 28ac80cb1a96c208a3e95f76f051ebafc74390aa Mon Sep 17 00:00:00 2001 From: Haodong Duan Date: Tue, 17 Aug 2021 20:00:24 +0800 Subject: [PATCH] [Fix] Works on Regression in July (#1081) * set videos_per_gpu to 1 for testing * replace top5_accuracy * update * fix * fix * workers_per_gpu -> 2 * fix bug * accurate testing for sthv1 * fix bug * add cache in rawframedecode * Fix SlowOnly Sthv1 checkpoint * update * update * update * update * remove lambda in AVADataset * update * update --- .gitignore | 5 + configs/_base_/models/ircsn_r152.py | 2 +- ...etrained_r50_8x8x1_cosine_10e_ava22_rgb.py | 4 +- ...pretrained_r50_8x8x1_cosine_10e_ava_rgb.py | 4 +- ...etics_pretrained_r50_4x16x1_20e_ava_rgb.py | 2 +- ...etics_pretrained_r50_4x16x1_20e_ava_rgb.py | 2 +- ...d_r50_4x16x1_20e_ava_rgb_custom_classes.py | 2 +- ...netics_pretrained_r50_8x8x1_20e_ava_rgb.py | 3 +- ...etrained_r50_8x8x1_cosine_10e_ava22_rgb.py | 4 +- ...etrained_r50_8x8x1_cosine_10e_ava22_rgb.py | 4 +- ...etrained_r50_8x8x1_cosine_10e_ava22_rgb.py | 4 +- ...etics_pretrained_r101_8x8x1_20e_ava_rgb.py | 2 +- ...etics_pretrained_r50_4x16x1_20e_ava_rgb.py | 2 +- ...d_r50_4x16x1_20e_ava_rgb_custom_classes.py | 2 +- ...etics_pretrained_r50_4x16x1_10e_ava_rgb.py | 2 +- ...netics_pretrained_r50_8x8x1_10e_ava_rgb.py | 2 +- ...ource_pretrained_r101_8x8x1_20e_ava_rgb.py | 2 +- ...ource_pretrained_r50_4x16x1_20e_ava_rgb.py | 2 +- ...trained_slowonly_r50_4x16x1_20e_ava_rgb.py | 2 +- ...trained_slowonly_r50_4x16x1_20e_ava_rgb.py | 2 +- ...trained_slowonly_r50_4x16x1_20e_ava_rgb.py | 2 +- configs/recognition/c3d/metafile.yml | 2 +- ...frozen_r152_32x2x1_180e_kinetics400_rgb.py | 3 +- ...frozen_r152_32x2x1_180e_kinetics400_rgb.py | 3 +- ...nfrozen_r50_32x2x1_180e_kinetics400_rgb.py | 3 +- ...nfrozen_r152_32x2x1_58e_kinetics400_rgb.py | 3 +- ...bnfrozen_r50_32x2x1_58e_kinetics400_rgb.py | 3 +- ...trained_r152_32x2x1_58e_kinetics400_rgb.py | 3 +- configs/recognition/csn/metafile.yml | 16 ++-- ...product_r50_32x2x1_100e_kinetics400_rgb.py | 3 +- .../i3d_r50_32x2x1_100e_kinetics400_rgb.py | 3 +- ...d_r50_dense_32x2x1_100e_kinetics400_rgb.py | 3 +- ...3d_r50_heavy_8x8x1_100e_kinetics400_rgb.py | 3 +- ...3d_r50_lazy_32x2x1_100e_kinetics400_rgb.py | 3 +- ...d_r50_video_32x2x1_100e_kinetics400_rgb.py | 3 +- ..._video_heavy_8x8x1_100e_kinetics400_rgb.py | 3 +- ...ideo_imgaug_32x2x1_100e_kinetics400_rgb.py | 3 +- configs/recognition/i3d/metafile.yml | 20 ++-- configs/recognition/omnisource/metafile.yml | 32 +++---- ...8x8x1_256e_minikinetics_googleimage_rgb.py | 1 + ...50_8x8x1_256e_minikinetics_insvideo_rgb.py | 1 + ...8x8x1_256e_minikinetics_kineticsraw_rgb.py | 1 + ..._8x8x1_256e_minikinetics_omnisource_rgb.py | 3 +- ...lowonly_r50_8x8x1_256e_minikinetics_rgb.py | 3 +- ...50_8x8x1_256e_minikinetics_webimage_rgb.py | 1 + ...1x1x8_100e_minikinetics_googleimage_rgb.py | 1 + ...50_1x1x8_100e_minikinetics_insvideo_rgb.py | 1 + ...1x1x8_100e_minikinetics_kineticsraw_rgb.py | 1 + ..._1x1x8_100e_minikinetics_omnisource_rgb.py | 3 +- .../tsn_r50_1x1x8_100e_minikinetics_rgb.py | 3 +- ...50_1x1x8_100e_minikinetics_webimage_rgb.py | 1 + configs/recognition/r2plus1d/metafile.yml | 8 +- ...2plus1d_r34_32x2x1_180e_kinetics400_rgb.py | 3 +- ...r2plus1d_r34_8x8x1_180e_kinetics400_rgb.py | 3 +- ...1d_r34_video_8x8x1_180e_kinetics400_rgb.py | 3 +- configs/recognition/slowfast/metafile.yml | 18 ++-- ...lowfast_r101_8x8x1_256e_kinetics400_rgb.py | 10 +- ...st_r101_r50_4x16x1_256e_kinetics400_rgb.py | 10 +- ...st_r152_r50_4x16x1_256e_kinetics400_rgb.py | 10 +- .../slowfast_r50_16x8x1_22e_sthv1_rgb.py | 3 +- ...lowfast_r50_4x16x1_256e_kinetics400_rgb.py | 3 +- ...slowfast_r50_8x8x1_256e_kinetics400_rgb.py | 2 +- ...t_r50_video_4x16x1_256e_kinetics400_rgb.py | 3 +- configs/recognition/slowonly/README.md | 2 +- configs/recognition/slowonly/README_zh-CN.md | 2 +- ...edcrop_256p_4x16x1_256e_kinetics400_rgb.py | 3 +- ...edcrop_320p_4x16x1_256e_kinetics400_rgb.py | 3 +- ...rop_340x256_4x16x1_256e_kinetics400_rgb.py | 3 +- configs/recognition/slowonly/metafile.yml | 48 +++++----- ...et_pretrained_r50_4x16x1_120e_gym99_rgb.py | 3 +- ...trained_r50_4x16x1_150e_kinetics400_rgb.py | 3 +- ...net_pretrained_r50_8x4x1_64e_hmdb51_rgb.py | 4 +- ...enet_pretrained_r50_8x4x1_64e_sthv1_rgb.py | 4 +- ...enet_pretrained_r50_8x4x1_64e_sthv2_rgb.py | 4 +- ...net_pretrained_r50_8x4x1_64e_ucf101_rgb.py | 4 +- ...etrained_r50_8x8x1_150e_kinetics400_rgb.py | 3 +- ...net_pretrained_r50_8x8x1_64e_jester_rgb.py | 2 +- ...0_pretrained_r50_4x16x1_120e_gym99_flow.py | 3 +- ...400_pretrained_r50_8x4x1_40e_hmdb51_rgb.py | 4 +- ...400_pretrained_r50_8x4x1_40e_ucf101_rgb.py | 4 +- ...aussian_r50_4x16x1_150e_kinetics400_rgb.py | 2 +- ...gaussian_r50_8x8x1_150e_kinetics400_rgb.py | 2 +- ...owonly_r50_4x16x1_256e_kinetics400_flow.py | 3 +- ...lowonly_r50_4x16x1_256e_kinetics400_rgb.py | 3 +- ...lowonly_r50_8x8x1_256e_kinetics400_flow.py | 3 +- ...slowonly_r50_8x8x1_256e_kinetics400_rgb.py | 3 +- ...y_r50_video_4x16x1_256e_kinetics400_rgb.py | 3 +- ...ly_r50_video_8x8x1_256e_kinetics600_rgb.py | 3 +- ...ly_r50_video_8x8x1_256e_kinetics700_rgb.py | 3 +- configs/recognition/tanet/metafile.yml | 10 +- .../tanet/tanet_r50_1x1x16_50e_sthv1_rgb.py | 6 +- .../tanet/tanet_r50_1x1x8_50e_sthv1_rgb.py | 6 +- ...et_r50_dense_1x1x8_100e_kinetics400_rgb.py | 4 +- ...former_divST_8x32x1_15e_kinetics400_rgb.py | 3 +- ...rmer_jointST_8x32x1_15e_kinetics400_rgb.py | 3 +- ...er_spaceOnly_8x32x1_15e_kinetics400_rgb.py | 3 +- configs/recognition/tin/metafile.yml | 6 +- .../tin/tin_r50_1x1x8_40e_sthv1_rgb.py | 3 +- .../tin/tin_r50_1x1x8_40e_sthv2_rgb.py | 3 +- ..._finetune_r50_1x1x8_50e_kinetics400_rgb.py | 3 +- configs/recognition/tpn/metafile.yml | 2 +- ...ed_slowonly_r50_8x8x1_150e_kinetics_rgb.py | 1 + .../tpn/tpn_tsm_r50_1x1x8_150e_sthv1_rgb.py | 2 +- configs/recognition/trn/metafile.yml | 8 +- .../trn/trn_r50_1x1x8_50e_sthv1_rgb.py | 2 +- .../trn/trn_r50_1x1x8_50e_sthv2_rgb.py | 2 +- configs/recognition/tsm/metafile.yml | 94 +++++++++---------- ...00_pretrained_r50_1x1x16_25e_hmdb51_rgb.py | 3 +- ...00_pretrained_r50_1x1x16_25e_ucf101_rgb.py | 3 +- ...400_pretrained_r50_1x1x8_25e_hmdb51_rgb.py | 3 +- ...400_pretrained_r50_1x1x8_25e_ucf101_rgb.py | 3 +- ...enetv2_dense_1x1x8_100e_kinetics400_rgb.py | 3 +- ..._video_dense_1x1x8_100e_kinetics400_rgb.py | 3 +- ...erence_dense_1x1x8_100e_kinetics400_rgb.py | 2 +- ...t_product_r50_1x1x8_50e_kinetics400_rgb.py | 3 +- ..._gaussian_r50_1x1x8_50e_kinetics400_rgb.py | 3 +- ..._gaussian_r50_1x1x8_50e_kinetics400_rgb.py | 3 +- .../tsm/tsm_r101_1x1x8_50e_sthv2_rgb.py | 6 +- .../tsm/tsm_r50_1x1x16_50e_kinetics400_rgb.py | 3 +- .../tsm/tsm_r50_1x1x16_50e_sthv1_rgb.py | 6 +- .../tsm/tsm_r50_1x1x16_50e_sthv2_rgb.py | 6 +- .../tsm/tsm_r50_1x1x8_100e_kinetics400_rgb.py | 2 +- .../tsm/tsm_r50_1x1x8_50e_jester_rgb.py | 2 +- .../tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py | 3 +- .../tsm/tsm_r50_1x1x8_50e_sthv1_rgb.py | 6 +- .../tsm/tsm_r50_1x1x8_50e_sthv2_rgb.py | 6 +- .../tsm/tsm_r50_cutmix_1x1x8_50e_sthv1_rgb.py | 3 +- ...sm_r50_dense_1x1x8_100e_kinetics400_rgb.py | 5 +- .../tsm/tsm_r50_flip_1x1x8_50e_sthv1_rgb.py | 6 +- ...50_flip_randaugment_1x1x8_50e_sthv1_rgb.py | 6 +- ...gpu_normalize_1x1x8_50e_kinetics400_rgb.py | 3 +- .../tsm/tsm_r50_mixup_1x1x8_50e_sthv1_rgb.py | 3 +- .../tsm_r50_ptv_augmix_1x1x8_50e_sthv1_rgb.py | 4 +- ...r50_ptv_randaugment_1x1x8_50e_sthv1_rgb.py | 4 +- ...tsm_r50_randaugment_1x1x8_50e_sthv1_rgb.py | 6 +- .../tsm_r50_video_1x1x16_50e_diving48_rgb.py | 3 +- .../tsm_r50_video_1x1x8_50e_diving48_rgb.py | 3 +- ...tsm_r50_video_1x1x8_50e_kinetics400_rgb.py | 3 +- ...ense161_320p_1x1x3_100e_kinetics400_rgb.py | 3 +- ...1_32x4d_320p_1x1x3_100e_kinetics400_rgb.py | 3 +- ...r_video_320p_1x1x3_100e_kinetics400_rgb.py | 4 +- ...alecrop_256p_1x1x3_100e_kinetics400_rgb.py | 3 +- ...alecrop_320p_1x1x3_100e_kinetics400_rgb.py | 3 +- ...crop_340x256_1x1x3_100e_kinetics400_rgb.py | 3 +- ...zedcrop_256p_1x1x3_100e_kinetics400_rgb.py | 3 +- ...zedcrop_320p_1x1x3_100e_kinetics400_rgb.py | 3 +- ...crop_340x256_1x1x3_100e_kinetics400_rgb.py | 3 +- ...256p_1x1x25_10crop_100e_kinetics400_rgb.py | 3 +- ..._256p_1x1x25_3crop_100e_kinetics400_rgb.py | 3 +- ...320p_1x1x25_10crop_100e_kinetics400_rgb.py | 3 +- ..._320p_1x1x25_3crop_100e_kinetics400_rgb.py | 3 +- ...x256_1x1x25_10crop_100e_kinetics400_rgb.py | 3 +- ...0x256_1x1x25_3crop_100e_kinetics400_rgb.py | 3 +- .../hvu/tsn_r18_1x1x8_100e_hvu_action_rgb.py | 1 + .../tsn_r18_1x1x8_100e_hvu_attribute_rgb.py | 1 + .../hvu/tsn_r18_1x1x8_100e_hvu_concept_rgb.py | 1 + .../hvu/tsn_r18_1x1x8_100e_hvu_event_rgb.py | 1 + .../hvu/tsn_r18_1x1x8_100e_hvu_object_rgb.py | 1 + .../hvu/tsn_r18_1x1x8_100e_hvu_scene_rgb.py | 1 + configs/recognition/tsn/metafile.yml | 72 +++++++------- ...tsn_fp16_r50_1x1x3_100e_kinetics400_rgb.py | 3 +- .../tsn/tsn_r101_1x1x5_50e_mmit_rgb.py | 10 +- .../tsn/tsn_r50_1x1x16_50e_sthv1_rgb.py | 3 +- .../tsn/tsn_r50_1x1x16_50e_sthv2_rgb.py | 19 +++- .../tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py | 3 +- .../tsn/tsn_r50_1x1x3_75e_ucf101_rgb.py | 3 +- .../tsn/tsn_r50_1x1x6_100e_mit_rgb.py | 3 +- .../tsn_r50_1x1x8_50e_hmdb51_imagenet_rgb.py | 3 +- ...sn_r50_1x1x8_50e_hmdb51_kinetics400_rgb.py | 3 +- .../tsn/tsn_r50_1x1x8_50e_hmdb51_mit_rgb.py | 1 + .../tsn/tsn_r50_1x1x8_50e_sthv1_rgb.py | 3 +- .../tsn/tsn_r50_1x1x8_50e_sthv2_rgb.py | 17 +++- ...tsn_r50_320p_1x1x3_100e_kinetics400_rgb.py | 3 +- ...sn_r50_320p_1x1x3_110e_kinetics400_flow.py | 3 +- ...tsn_r50_320p_1x1x8_100e_kinetics400_rgb.py | 3 +- ...sn_r50_320p_1x1x8_110e_kinetics400_flow.py | 3 +- ...0_320p_1x1x8_150e_activitynet_clip_flow.py | 3 +- ..._320p_1x1x8_150e_activitynet_video_flow.py | 3 +- ...r50_320p_1x1x8_50e_activitynet_clip_rgb.py | 3 +- ...50_320p_1x1x8_50e_activitynet_video_rgb.py | 3 +- ...n_r50_clip_feature_extraction_1x1x3_rgb.py | 1 + ...sn_r50_dense_1x1x5_100e_kinetics400_rgb.py | 5 +- ...sn_r50_dense_1x1x8_100e_kinetics400_rgb.py | 3 +- .../tsn_r50_video_1x1x16_100e_diving48_rgb.py | 3 +- .../tsn_r50_video_1x1x8_100e_diving48_rgb.py | 3 +- ...sn_r50_video_1x1x8_100e_kinetics400_rgb.py | 3 +- ...sn_r50_video_1x1x8_100e_kinetics600_rgb.py | 3 +- ...sn_r50_video_1x1x8_100e_kinetics700_rgb.py | 3 +- ...0_video_320p_1x1x3_100e_kinetics400_rgb.py | 3 +- ..._video_dense_1x1x8_100e_kinetics400_rgb.py | 3 +- ...video_imgaug_1x1x8_100e_kinetics400_rgb.py | 10 +- ..._video_mixup_1x1x8_100e_kinetics400_rgb.py | 3 +- ...0_64x1x1_100e_kinetics400_audio_feature.py | 2 +- configs/recognition_audio/resnet/metafile.yml | 2 +- ...8_64x1x1_100e_kinetics400_audio_feature.py | 2 +- .../tsn_r50_64x1x1_100e_kinetics400_audio.py | 2 +- ...wonly_r50_u48_240e_ntu120_xsub_keypoint.py | 2 +- .../slowonly_r50_u48_240e_ntu120_xsub_limb.py | 2 +- ...owonly_r50_u48_240e_ntu60_xsub_keypoint.py | 2 +- .../slowonly_r50_u48_240e_ntu60_xsub_limb.py | 2 +- demo/mmaction2_tutorial.ipynb | 2 +- demo/mmaction2_tutorial_zh-CN.ipynb | 2 +- docs/tutorials/1_config.md | 6 +- docs/tutorials/3_new_dataset.md | 2 +- docs_zh_CN/tutorials/1_config.md | 6 +- docs_zh_CN/tutorials/3_new_dataset.md | 2 +- mmaction/datasets/ava_dataset.py | 19 ++-- mmaction/models/heads/misc_head.py | 4 +- tools/data/jester/README.md | 2 +- tools/data/jester/README_zh-CN.md | 2 +- tools/data/sthv1/README.md | 2 +- tools/data/sthv1/README_zh-CN.md | 2 +- 212 files changed, 589 insertions(+), 407 deletions(-) diff --git a/.gitignore b/.gitignore index 4eba3d0d8e..587b296482 100644 --- a/.gitignore +++ b/.gitignore @@ -132,4 +132,9 @@ work_dirs/ # avoid soft links created by MIM mmaction/configs/* mmaction/tools/* + +*.ipynb + +# unignore ipython notebook files in demo +!demo/*.ipynb mmaction/.mim diff --git a/configs/_base_/models/ircsn_r152.py b/configs/_base_/models/ircsn_r152.py index fcab416cbd..36e700c384 100644 --- a/configs/_base_/models/ircsn_r152.py +++ b/configs/_base_/models/ircsn_r152.py @@ -19,4 +19,4 @@ init_std=0.01), # model training and testing settings train_cfg=None, - test_cfg=dict(average_clips='prob')) + test_cfg=dict(average_clips='prob', max_testing_views=10)) diff --git a/configs/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py b/configs/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py index 7bd1ea779d..442165082f 100644 --- a/configs/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py +++ b/configs/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py @@ -70,7 +70,7 @@ exclude_file_train = f'{anno_root}/ava_train_excluded_timestamps_v2.2.csv' exclude_file_val = f'{anno_root}/ava_val_excluded_timestamps_v2.2.csv' -label_file = f'{anno_root}/ava_action_list_v2.2.pbtxt' +label_file = f'{anno_root}/ava_action_list_v2.2_for_activitynet_2019.pbtxt' proposal_file_train = (f'{anno_root}/ava_dense_proposals_train.FAIR.' 'recall_93.9.pkl') @@ -118,7 +118,7 @@ data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava_rgb.py b/configs/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava_rgb.py index 50806ddacb..d199598628 100644 --- a/configs/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava_rgb.py +++ b/configs/detection/acrn/slowfast_acrn_kinetics_pretrained_r50_8x8x1_cosine_10e_ava_rgb.py @@ -70,7 +70,7 @@ exclude_file_train = f'{anno_root}/ava_train_excluded_timestamps_v2.1.csv' exclude_file_val = f'{anno_root}/ava_val_excluded_timestamps_v2.1.csv' -label_file = f'{anno_root}/ava_action_list_v2.1.pbtxt' +label_file = f'{anno_root}/ava_action_list_v2.1_for_activitynet_2018.pbtxt' proposal_file_train = (f'{anno_root}/ava_dense_proposals_train.FAIR.' 'recall_93.9.pkl') @@ -118,7 +118,7 @@ data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/detection/ava/slowfast_context_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py b/configs/detection/ava/slowfast_context_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py index 6b5796425c..27b5637276 100644 --- a/configs/detection/ava/slowfast_context_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py +++ b/configs/detection/ava/slowfast_context_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py @@ -119,7 +119,7 @@ data = dict( videos_per_gpu=9, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py b/configs/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py index 22020db977..3f1fadc720 100644 --- a/configs/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py +++ b/configs/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py @@ -118,7 +118,7 @@ data = dict( videos_per_gpu=9, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes.py b/configs/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes.py index 3b14fabd04..713136ca3e 100644 --- a/configs/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes.py +++ b/configs/detection/ava/slowfast_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes.py @@ -124,7 +124,7 @@ data = dict( videos_per_gpu=9, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_20e_ava_rgb.py b/configs/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_20e_ava_rgb.py index 9106fa8d29..89e83a0b8b 100644 --- a/configs/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_20e_ava_rgb.py +++ b/configs/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_20e_ava_rgb.py @@ -12,6 +12,7 @@ depth=50, pretrained=None, lateral=True, + fusion_kernel=7, conv1_kernel=(1, 7, 7), dilations=(1, 1, 1, 1), conv1_stride_t=1, @@ -118,7 +119,7 @@ data = dict( videos_per_gpu=5, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py b/configs/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py index f7898d0b61..1b02c1a205 100644 --- a/configs/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py +++ b/configs/detection/ava/slowfast_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py @@ -68,7 +68,7 @@ exclude_file_train = f'{anno_root}/ava_train_excluded_timestamps_v2.2.csv' exclude_file_val = f'{anno_root}/ava_val_excluded_timestamps_v2.2.csv' -label_file = f'{anno_root}/ava_action_list_v2.2.pbtxt' +label_file = f'{anno_root}/ava_action_list_v2.2_for_activitynet_2019.pbtxt' proposal_file_train = (f'{anno_root}/ava_dense_proposals_train.FAIR.' 'recall_93.9.pkl') @@ -116,7 +116,7 @@ data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/detection/ava/slowfast_temporal_max_focal_alpha3_gamma1_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py b/configs/detection/ava/slowfast_temporal_max_focal_alpha3_gamma1_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py index b59e0008d8..5c167e9bee 100644 --- a/configs/detection/ava/slowfast_temporal_max_focal_alpha3_gamma1_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py +++ b/configs/detection/ava/slowfast_temporal_max_focal_alpha3_gamma1_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py @@ -69,7 +69,7 @@ exclude_file_train = f'{anno_root}/ava_train_excluded_timestamps_v2.2.csv' exclude_file_val = f'{anno_root}/ava_val_excluded_timestamps_v2.2.csv' -label_file = f'{anno_root}/ava_action_list_v2.2.pbtxt' +label_file = f'{anno_root}/ava_action_list_v2.2_for_activitynet_2019.pbtxt' proposal_file_train = (f'{anno_root}/ava_dense_proposals_train.FAIR.' 'recall_93.9.pkl') @@ -117,7 +117,7 @@ data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/detection/ava/slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py b/configs/detection/ava/slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py index e77496a39a..4bea67b696 100644 --- a/configs/detection/ava/slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py +++ b/configs/detection/ava/slowfast_temporal_max_kinetics_pretrained_r50_8x8x1_cosine_10e_ava22_rgb.py @@ -69,7 +69,7 @@ exclude_file_train = f'{anno_root}/ava_train_excluded_timestamps_v2.2.csv' exclude_file_val = f'{anno_root}/ava_val_excluded_timestamps_v2.2.csv' -label_file = f'{anno_root}/ava_action_list_v2.2.pbtxt' +label_file = f'{anno_root}/ava_action_list_v2.2_for_activitynet_2019.pbtxt' proposal_file_train = (f'{anno_root}/ava_dense_proposals_train.FAIR.' 'recall_93.9.pkl') @@ -117,7 +117,7 @@ data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/detection/ava/slowonly_kinetics_pretrained_r101_8x8x1_20e_ava_rgb.py b/configs/detection/ava/slowonly_kinetics_pretrained_r101_8x8x1_20e_ava_rgb.py index ce12865cd0..4967ea3679 100644 --- a/configs/detection/ava/slowonly_kinetics_pretrained_r101_8x8x1_20e_ava_rgb.py +++ b/configs/detection/ava/slowonly_kinetics_pretrained_r101_8x8x1_20e_ava_rgb.py @@ -102,7 +102,7 @@ data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, # During testing, each video may have different shape val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), diff --git a/configs/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py b/configs/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py index 7ff769e7a8..d42c6b67c0 100644 --- a/configs/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py +++ b/configs/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb.py @@ -102,7 +102,7 @@ data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes.py b/configs/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes.py index 1f81b01afa..0e6ff25105 100644 --- a/configs/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes.py +++ b/configs/detection/ava/slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb_custom_classes.py @@ -109,7 +109,7 @@ data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/detection/ava/slowonly_nl_kinetics_pretrained_r50_4x16x1_10e_ava_rgb.py b/configs/detection/ava/slowonly_nl_kinetics_pretrained_r50_4x16x1_10e_ava_rgb.py index b92faa6b17..c18273bbd4 100644 --- a/configs/detection/ava/slowonly_nl_kinetics_pretrained_r50_4x16x1_10e_ava_rgb.py +++ b/configs/detection/ava/slowonly_nl_kinetics_pretrained_r50_4x16x1_10e_ava_rgb.py @@ -60,7 +60,7 @@ data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, # During testing, each video may have different shape val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), diff --git a/configs/detection/ava/slowonly_nl_kinetics_pretrained_r50_8x8x1_10e_ava_rgb.py b/configs/detection/ava/slowonly_nl_kinetics_pretrained_r50_8x8x1_10e_ava_rgb.py index 5bd3489bb8..bd05e864cc 100644 --- a/configs/detection/ava/slowonly_nl_kinetics_pretrained_r50_8x8x1_10e_ava_rgb.py +++ b/configs/detection/ava/slowonly_nl_kinetics_pretrained_r50_8x8x1_10e_ava_rgb.py @@ -60,7 +60,7 @@ data = dict( videos_per_gpu=6, - workers_per_gpu=3, + workers_per_gpu=2, # During testing, each video may have different shape val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), diff --git a/configs/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb.py b/configs/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb.py index 0113a42751..3c5adc3e77 100644 --- a/configs/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb.py +++ b/configs/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb.py @@ -101,7 +101,7 @@ ] data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, # During testing, each video may have different shape val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), diff --git a/configs/detection/ava/slowonly_omnisource_pretrained_r50_4x16x1_20e_ava_rgb.py b/configs/detection/ava/slowonly_omnisource_pretrained_r50_4x16x1_20e_ava_rgb.py index 37af19e945..4aa7e72ef6 100644 --- a/configs/detection/ava/slowonly_omnisource_pretrained_r50_4x16x1_20e_ava_rgb.py +++ b/configs/detection/ava/slowonly_omnisource_pretrained_r50_4x16x1_20e_ava_rgb.py @@ -102,7 +102,7 @@ data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, # During testing, each video may have different shape val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), diff --git a/configs/detection/lfb/lfb_avg_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py b/configs/detection/lfb/lfb_avg_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py index f9832276bd..09f5ba43c5 100644 --- a/configs/detection/lfb/lfb_avg_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py +++ b/configs/detection/lfb/lfb_avg_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py @@ -81,7 +81,7 @@ data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/detection/lfb/lfb_max_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py b/configs/detection/lfb/lfb_max_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py index ba1c5a3025..8e7434c2fe 100644 --- a/configs/detection/lfb/lfb_max_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py +++ b/configs/detection/lfb/lfb_max_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py @@ -81,7 +81,7 @@ data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/detection/lfb/lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py b/configs/detection/lfb/lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py index 862ec19498..f2d11ff3a5 100644 --- a/configs/detection/lfb/lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py +++ b/configs/detection/lfb/lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py @@ -91,7 +91,7 @@ data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/recognition/c3d/metafile.yml b/configs/recognition/c3d/metafile.yml index 781d39a4ca..4fa40ddd54 100644 --- a/configs/recognition/c3d/metafile.yml +++ b/configs/recognition/c3d/metafile.yml @@ -20,7 +20,7 @@ Models: - Dataset: UCF101 Metrics: Top 1 Accuracy: 83.27 - top5 accuracy: 95.9 + Top 5 Accuracy: 95.9 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/c3d/c3d_sports1m_16x1x1_45e_ucf101_rgb/20201021_140429.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/c3d/c3d_sports1m_16x1x1_45e_ucf101_rgb/20201021_140429.log diff --git a/configs/recognition/csn/ipcsn_bnfrozen_r152_32x2x1_180e_kinetics400_rgb.py b/configs/recognition/csn/ipcsn_bnfrozen_r152_32x2x1_180e_kinetics400_rgb.py index 1a5b54c056..6c0792f2eb 100644 --- a/configs/recognition/csn/ipcsn_bnfrozen_r152_32x2x1_180e_kinetics400_rgb.py +++ b/configs/recognition/csn/ipcsn_bnfrozen_r152_32x2x1_180e_kinetics400_rgb.py @@ -59,7 +59,8 @@ ] data = dict( videos_per_gpu=4, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/csn/ircsn_bnfrozen_r152_32x2x1_180e_kinetics400_rgb.py b/configs/recognition/csn/ircsn_bnfrozen_r152_32x2x1_180e_kinetics400_rgb.py index 8ce5fb5180..19873781b2 100644 --- a/configs/recognition/csn/ircsn_bnfrozen_r152_32x2x1_180e_kinetics400_rgb.py +++ b/configs/recognition/csn/ircsn_bnfrozen_r152_32x2x1_180e_kinetics400_rgb.py @@ -59,7 +59,8 @@ ] data = dict( videos_per_gpu=4, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/csn/ircsn_bnfrozen_r50_32x2x1_180e_kinetics400_rgb.py b/configs/recognition/csn/ircsn_bnfrozen_r50_32x2x1_180e_kinetics400_rgb.py index ebb3d92856..cef9d5dea7 100644 --- a/configs/recognition/csn/ircsn_bnfrozen_r50_32x2x1_180e_kinetics400_rgb.py +++ b/configs/recognition/csn/ircsn_bnfrozen_r50_32x2x1_180e_kinetics400_rgb.py @@ -63,7 +63,8 @@ ] data = dict( videos_per_gpu=4, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/csn/ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb.py b/configs/recognition/csn/ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb.py index d25736ba76..54bc5b012f 100644 --- a/configs/recognition/csn/ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb.py +++ b/configs/recognition/csn/ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb.py @@ -63,7 +63,8 @@ ] data = dict( videos_per_gpu=3, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/csn/ircsn_ig65m_pretrained_bnfrozen_r50_32x2x1_58e_kinetics400_rgb.py b/configs/recognition/csn/ircsn_ig65m_pretrained_bnfrozen_r50_32x2x1_58e_kinetics400_rgb.py index 9e39011374..fc44dc4251 100644 --- a/configs/recognition/csn/ircsn_ig65m_pretrained_bnfrozen_r50_32x2x1_58e_kinetics400_rgb.py +++ b/configs/recognition/csn/ircsn_ig65m_pretrained_bnfrozen_r50_32x2x1_58e_kinetics400_rgb.py @@ -64,7 +64,8 @@ ] data = dict( videos_per_gpu=3, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/csn/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb.py b/configs/recognition/csn/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb.py index eba08ca20b..015526ccf6 100644 --- a/configs/recognition/csn/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb.py +++ b/configs/recognition/csn/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb.py @@ -61,7 +61,8 @@ ] data = dict( videos_per_gpu=3, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/csn/metafile.yml b/configs/recognition/csn/metafile.yml index 10edad13c2..6ad1d82831 100644 --- a/configs/recognition/csn/metafile.yml +++ b/configs/recognition/csn/metafile.yml @@ -20,7 +20,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 80.14 - top5 accuracy: 94.93 + Top 5 Accuracy: 94.93 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/csn/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb/20200728_031952.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/csn/ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb/20200728_031952.log @@ -43,7 +43,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 82.76 - top5 accuracy: 95.68 + Top 5 Accuracy: 95.68 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/csn/ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb/20200809_053132.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/csn/ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb/20200809_053132.log @@ -64,7 +64,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 77.8 - top5 accuracy: 92.8 + Top 5 Accuracy: 92.8 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/csn/vmz/vmz_ipcsn_from_scratch_r152_32x2x1_180e_kinetics400_rgb_20210617-d565828d.pth - Config: configs/recognition/csn/ipcsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb.py @@ -83,7 +83,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 82.5 - top5 accuracy: 95.3 + Top 5 Accuracy: 95.3 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/csn/vmz/vmz_ipcsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb_20210617-c3be9793.pth inference_time(video/s): x @@ -103,7 +103,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 78.8 - top5 accuracy: 93.5 + Top 5 Accuracy: 93.5 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/csn/vmz/vmz_ipcsn_sports1m_pretrained_r152_32x2x1_58e_kinetics400_rgb_20210617-3367437a.pth inference_time(video/s): x @@ -123,7 +123,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 76.5 - top5 accuracy: 92.1 + Top 5 Accuracy: 92.1 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/csn/vmz/vmz_ircsn_from_scratch_r152_32x2x1_180e_kinetics400_rgb_20210617-5c933ae1.pth inference_time(video/s): x @@ -143,7 +143,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 79.0 - top5 accuracy: 94.2 + Top 5 Accuracy: 94.2 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/csn/vmz/vmz_ircsn_ig65m_pretrained_r50_32x2x1_58e_kinetics400_rgb_20210617-86d33018.pth inference_time(video/s): x @@ -163,7 +163,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 78.2 - top5 accuracy: 93.0 + Top 5 Accuracy: 93.0 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/csn/vmz/vmz_ircsn_sports1m_pretrained_r152_32x2x1_58e_kinetics400_rgb_20210617-b9b10241.pth inference_time(video/s): x diff --git a/configs/recognition/i3d/i3d_nl_dot_product_r50_32x2x1_100e_kinetics400_rgb.py b/configs/recognition/i3d/i3d_nl_dot_product_r50_32x2x1_100e_kinetics400_rgb.py index 8ff1e2ff1e..466285006a 100644 --- a/configs/recognition/i3d/i3d_nl_dot_product_r50_32x2x1_100e_kinetics400_rgb.py +++ b/configs/recognition/i3d/i3d_nl_dot_product_r50_32x2x1_100e_kinetics400_rgb.py @@ -71,7 +71,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/i3d/i3d_r50_32x2x1_100e_kinetics400_rgb.py b/configs/recognition/i3d/i3d_r50_32x2x1_100e_kinetics400_rgb.py index 894e8196be..aa0e523f14 100644 --- a/configs/recognition/i3d/i3d_r50_32x2x1_100e_kinetics400_rgb.py +++ b/configs/recognition/i3d/i3d_r50_32x2x1_100e_kinetics400_rgb.py @@ -61,7 +61,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/i3d/i3d_r50_dense_32x2x1_100e_kinetics400_rgb.py b/configs/recognition/i3d/i3d_r50_dense_32x2x1_100e_kinetics400_rgb.py index f1bdc4f4a1..17ea4303b9 100644 --- a/configs/recognition/i3d/i3d_r50_dense_32x2x1_100e_kinetics400_rgb.py +++ b/configs/recognition/i3d/i3d_r50_dense_32x2x1_100e_kinetics400_rgb.py @@ -58,7 +58,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/i3d/i3d_r50_heavy_8x8x1_100e_kinetics400_rgb.py b/configs/recognition/i3d/i3d_r50_heavy_8x8x1_100e_kinetics400_rgb.py index b4688d4c13..f21feb2a01 100644 --- a/configs/recognition/i3d/i3d_r50_heavy_8x8x1_100e_kinetics400_rgb.py +++ b/configs/recognition/i3d/i3d_r50_heavy_8x8x1_100e_kinetics400_rgb.py @@ -66,7 +66,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/i3d/i3d_r50_lazy_32x2x1_100e_kinetics400_rgb.py b/configs/recognition/i3d/i3d_r50_lazy_32x2x1_100e_kinetics400_rgb.py index eb285c89e9..de84b8feb5 100644 --- a/configs/recognition/i3d/i3d_r50_lazy_32x2x1_100e_kinetics400_rgb.py +++ b/configs/recognition/i3d/i3d_r50_lazy_32x2x1_100e_kinetics400_rgb.py @@ -62,7 +62,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/i3d/i3d_r50_video_32x2x1_100e_kinetics400_rgb.py b/configs/recognition/i3d/i3d_r50_video_32x2x1_100e_kinetics400_rgb.py index 968d6c9e77..1477ac2a99 100644 --- a/configs/recognition/i3d/i3d_r50_video_32x2x1_100e_kinetics400_rgb.py +++ b/configs/recognition/i3d/i3d_r50_video_32x2x1_100e_kinetics400_rgb.py @@ -61,7 +61,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/i3d/i3d_r50_video_heavy_8x8x1_100e_kinetics400_rgb.py b/configs/recognition/i3d/i3d_r50_video_heavy_8x8x1_100e_kinetics400_rgb.py index 2ee3ff7b28..973f7fb88f 100644 --- a/configs/recognition/i3d/i3d_r50_video_heavy_8x8x1_100e_kinetics400_rgb.py +++ b/configs/recognition/i3d/i3d_r50_video_heavy_8x8x1_100e_kinetics400_rgb.py @@ -61,7 +61,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/i3d/i3d_r50_video_imgaug_32x2x1_100e_kinetics400_rgb.py b/configs/recognition/i3d/i3d_r50_video_imgaug_32x2x1_100e_kinetics400_rgb.py index 68b1bc9971..86baa0289d 100644 --- a/configs/recognition/i3d/i3d_r50_video_imgaug_32x2x1_100e_kinetics400_rgb.py +++ b/configs/recognition/i3d/i3d_r50_video_imgaug_32x2x1_100e_kinetics400_rgb.py @@ -68,7 +68,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/i3d/metafile.yml b/configs/recognition/i3d/metafile.yml index 02f5704cd7..404a5334ff 100644 --- a/configs/recognition/i3d/metafile.yml +++ b/configs/recognition/i3d/metafile.yml @@ -20,7 +20,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 72.68 - top5 accuracy: 90.78 + Top 5 Accuracy: 90.78 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_r50_32x2x1_100e_kinetics400_rgb/20200614_060456.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_r50_32x2x1_100e_kinetics400_rgb/20200614_060456.log @@ -43,7 +43,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 73.27 - top5 accuracy: 90.92 + Top 5 Accuracy: 90.92 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_r50_256p_32x2x1_100e_kinetics400_rgb/20200725_031555.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_r50_256p_32x2x1_100e_kinetics400_rgb/20200725_031555.log @@ -66,7 +66,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 72.85 - top5 accuracy: 90.75 + Top 5 Accuracy: 90.75 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_r50_video_32x2x1_100e_kinetics400_rgb/20200706_143014.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_r50_video_32x2x1_100e_kinetics400_rgb/20200706_143014.log @@ -89,7 +89,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 72.77 - top5 accuracy: 90.57 + Top 5 Accuracy: 90.57 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_r50_dense_32x2x1_100e_kinetics400_rgb/20200616_230011.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_r50_dense_32x2x1_100e_kinetics400_rgb/20200616_230011.log @@ -112,7 +112,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 73.48 - top5 accuracy: 91.0 + Top 5 Accuracy: 91.0 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_r50_dense_256p_32x2x1_100e_kinetics400_rgb/20200725_031604.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_r50_dense_256p_32x2x1_100e_kinetics400_rgb/20200725_031604.log @@ -135,7 +135,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 72.32 - top5 accuracy: 90.72 + Top 5 Accuracy: 90.72 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_r50_fast_32x2x1_100e_kinetics400_rgb/20200612_233836.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_r50_fast_32x2x1_100e_kinetics400_rgb/20200612_233836.log @@ -158,7 +158,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 73.24 - top5 accuracy: 90.99 + Top 5 Accuracy: 90.99 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_r50_fast_256p_32x2x1_100e_kinetics400_rgb/20200725_031457.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_r50_fast_256p_32x2x1_100e_kinetics400_rgb/20200725_031457.log @@ -181,7 +181,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 74.71 - top5 accuracy: 91.81 + Top 5 Accuracy: 91.81 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_nl_embedded_gaussian_r50_32x2x1_100e_kinetics400_rgb/20200813_034054.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_nl_embedded_gaussian_r50_32x2x1_100e_kinetics400_rgb/20200813_034054.log @@ -204,7 +204,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 73.37 - top5 accuracy: 91.26 + Top 5 Accuracy: 91.26 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_nl_gaussian_r50_32x2x1_100e_kinetics400_rgb/20200813_034909.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_nl_gaussian_r50_32x2x1_100e_kinetics400_rgb/20200813_034909.log @@ -227,7 +227,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 73.92 - top5 accuracy: 91.59 + Top 5 Accuracy: 91.59 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_nl_dot_product_r50_32x2x1_100e_kinetics400_rgb/20200814_044208.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/i3d/i3d_nl_dot_product_r50_32x2x1_100e_kinetics400_rgb/20200814_044208.log diff --git a/configs/recognition/omnisource/metafile.yml b/configs/recognition/omnisource/metafile.yml index f0ce133aa9..71fb7e6ed6 100644 --- a/configs/recognition/omnisource/metafile.yml +++ b/configs/recognition/omnisource/metafile.yml @@ -21,7 +21,7 @@ Models: - Dataset: MiniKinetics Metrics: Top 1 Accuracy: 77.4 - top5 accuracy: 93.6 + Top 5 Accuracy: 93.6 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics_rgb/baseline/tsn_r50_1x1x8_100e_minikinetics_rgb_20201030.json Training Log: https://download.openmmlab.com/mmaction/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics_rgb/baseline/tsn_r50_1x1x8_100e_minikinetics_rgb_20201030.log @@ -45,7 +45,7 @@ Models: - Dataset: MiniKinetics Metrics: Top 1 Accuracy: 78.0 - top5 accuracy: 93.6 + Top 5 Accuracy: 93.6 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics_rgb/googleimage/tsn_r50_1x1x8_100e_minikinetics_googleimage_rgb_20201030.json Training Log: https://download.openmmlab.com/mmaction/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics_rgb/googleimage/tsn_r50_1x1x8_100e_minikinetics_googleimage_rgb_20201030.log @@ -69,7 +69,7 @@ Models: - Dataset: MiniKinetics Metrics: Top 1 Accuracy: 78.6 - top5 accuracy: 93.6 + Top 5 Accuracy: 93.6 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics_rgb/webimage/tsn_r50_1x1x8_100e_minikinetics_webimage_rgb_20201030.json Training Log: https://download.openmmlab.com/mmaction/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics_rgb/webimage/tsn_r50_1x1x8_100e_minikinetics_webimage_rgb_20201030.log @@ -93,7 +93,7 @@ Models: - Dataset: MiniKinetics Metrics: Top 1 Accuracy: 80.6 - top5 accuracy: 95.0 + Top 5 Accuracy: 95.0 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics_rgb/insvideo/tsn_r50_1x1x8_100e_minikinetics_insvideo_rgb_20201030.json Training Log: https://download.openmmlab.com/mmaction/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics_rgb/insvideo/tsn_r50_1x1x8_100e_minikinetics_insvideo_rgb_20201030.log @@ -117,7 +117,7 @@ Models: - Dataset: MiniKinetics Metrics: Top 1 Accuracy: 78.6 - top5 accuracy: 93.2 + Top 5 Accuracy: 93.2 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics_rgb/kineticsraw/tsn_r50_1x1x8_100e_minikinetics_kineticsraw_rgb_20201030.json Training Log: https://download.openmmlab.com/mmaction/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics_rgb/kineticsraw/tsn_r50_1x1x8_100e_minikinetics_kineticsraw_rgb_20201030.log @@ -141,7 +141,7 @@ Models: - Dataset: MiniKinetics Metrics: Top 1 Accuracy: 81.3 - top5 accuracy: 94.8 + Top 5 Accuracy: 94.8 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics_rgb/omnisource/tsn_r50_1x1x8_100e_minikinetics_omnisource_rgb_20201030.json Training Log: https://download.openmmlab.com/mmaction/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics_rgb/omnisource/tsn_r50_1x1x8_100e_minikinetics_omnisource_rgb_20201030.log @@ -165,7 +165,7 @@ Models: - Dataset: MiniKinetics Metrics: Top 1 Accuracy: 78.6 - top5 accuracy: 93.9 + Top 5 Accuracy: 93.9 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics_rgb/baseline/slowonly_r50_8x8x1_256e_minikinetics_rgb_20201030.json Training Log: https://download.openmmlab.com/mmaction/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics_rgb/baseline/slowonly_r50_8x8x1_256e_minikinetics_rgb_20201030.log @@ -189,7 +189,7 @@ Models: - Dataset: MiniKinetics Metrics: Top 1 Accuracy: 80.8 - top5 accuracy: 95.0 + Top 5 Accuracy: 95.0 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics_rgb/googleimage/slowonly_r50_8x8x1_256e_minikinetics_googleimage_rgb_20201030.json Training Log: https://download.openmmlab.com/mmaction/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics_rgb/googleimage/slowonly_r50_8x8x1_256e_minikinetics_googleimage_rgb_20201030.log @@ -213,7 +213,7 @@ Models: - Dataset: MiniKinetics Metrics: Top 1 Accuracy: 81.3 - top5 accuracy: 95.2 + Top 5 Accuracy: 95.2 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics_rgb/webimage/slowonly_r50_8x8x1_256e_minikinetics_webimage_rgb_20201030.json Training Log: https://download.openmmlab.com/mmaction/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics_rgb/webimage/slowonly_r50_8x8x1_256e_minikinetics_webimage_rgb_20201030.log @@ -237,7 +237,7 @@ Models: - Dataset: MiniKinetics Metrics: Top 1 Accuracy: 82.4 - top5 accuracy: 95.6 + Top 5 Accuracy: 95.6 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics_rgb/insvideo/slowonly_r50_8x8x1_256e_minikinetics_insvideo_rgb_20201030.json Training Log: https://download.openmmlab.com/mmaction/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics_rgb/insvideo/slowonly_r50_8x8x1_256e_minikinetics_insvideo_rgb_20201030.log @@ -261,7 +261,7 @@ Models: - Dataset: MiniKinetics Metrics: Top 1 Accuracy: 80.3 - top5 accuracy: 94.5 + Top 5 Accuracy: 94.5 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics_rgb/kineticsraw/slowonly_r50_8x8x1_256e_minikinetics_kineticsraw_rgb_20201030.json Training Log: https://download.openmmlab.com/mmaction/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics_rgb/kineticsraw/slowonly_r50_8x8x1_256e_minikinetics_kineticsraw_rgb_20201030.log @@ -285,7 +285,7 @@ Models: - Dataset: MiniKinetics Metrics: Top 1 Accuracy: 82.9 - top5 accuracy: 95.8 + Top 5 Accuracy: 95.8 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics_rgb/omnisource/slowonly_r50_8x8x1_256e_minikinetics_omnisource_rgb_20201030.json Training Log: https://download.openmmlab.com/mmaction/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics_rgb/omnisource/slowonly_r50_8x8x1_256e_minikinetics_omnisource_rgb_20201030.log @@ -307,7 +307,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 73.6 - top5 accuracy: 91.0 + Top 5 Accuracy: 91.0 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/tsn/omni/tsn_imagenet_pretrained_r50_omni_1x1x3_kinetics400_rgb_20200926-54192355.pth - Config: configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py @@ -327,7 +327,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 75.7 - top5 accuracy: 91.9 + Top 5 Accuracy: 91.9 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/tsn/omni/tsn_1G1B_pretrained_r50_omni_1x1x3_kinetics400_rgb_20200926-2863fed0.pth - Config: configs/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_rgb.py @@ -347,7 +347,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 76.8 - top5 accuracy: 92.5 + Top 5 Accuracy: 92.5 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/slowonly/omni/slowonly_r50_omni_4x16x1_kinetics400_rgb_20200926-51b1f7ea.pth - Config: configs/recognition/slowonly/slowonly_r101_8x8x1_196e_kinetics400_rgb.py @@ -367,6 +367,6 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 80.4 - top5 accuracy: 94.4 + Top 5 Accuracy: 94.4 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/slowonly/omni/slowonly_r101_omni_8x8x1_kinetics400_rgb_20200926-b5dbb701.pth diff --git a/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_googleimage_rgb.py b/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_googleimage_rgb.py index 171965ca4e..0aee7f2c2c 100644 --- a/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_googleimage_rgb.py +++ b/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_googleimage_rgb.py @@ -88,6 +88,7 @@ data = dict( videos_per_gpu=12, workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=[ dict( type=dataset_type, diff --git a/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_insvideo_rgb.py b/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_insvideo_rgb.py index 0f5f430b06..06195d431c 100644 --- a/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_insvideo_rgb.py +++ b/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_insvideo_rgb.py @@ -89,6 +89,7 @@ data = dict( videos_per_gpu=12, workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=[ dict( type=dataset_type, diff --git a/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_kineticsraw_rgb.py b/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_kineticsraw_rgb.py index a59abf4653..35263134cd 100644 --- a/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_kineticsraw_rgb.py +++ b/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_kineticsraw_rgb.py @@ -89,6 +89,7 @@ data = dict( videos_per_gpu=12, workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=[ dict( type=dataset_type, diff --git a/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_omnisource_rgb.py b/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_omnisource_rgb.py index 2f442d0f3d..4ef38005bc 100644 --- a/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_omnisource_rgb.py +++ b/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_omnisource_rgb.py @@ -119,7 +119,8 @@ data = dict( videos_per_gpu=12, - workers_per_gpu=1, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train_ratio=[2, 1, 1, 1], train=[ dict( diff --git a/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_rgb.py b/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_rgb.py index 0707487bcb..38f7be651b 100644 --- a/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_rgb.py +++ b/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_rgb.py @@ -69,7 +69,8 @@ data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_webimage_rgb.py b/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_webimage_rgb.py index 68f679ee7c..4acf708c5b 100644 --- a/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_webimage_rgb.py +++ b/configs/recognition/omnisource/slowonly_r50_8x8x1_256e_minikinetics/slowonly_r50_8x8x1_256e_minikinetics_webimage_rgb.py @@ -88,6 +88,7 @@ data = dict( videos_per_gpu=12, workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=[ dict( type=dataset_type, diff --git a/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_googleimage_rgb.py b/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_googleimage_rgb.py index d4c69bdcf3..447b7cb6c4 100644 --- a/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_googleimage_rgb.py +++ b/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_googleimage_rgb.py @@ -90,6 +90,7 @@ videos_per_gpu=12, omni_videos_per_gpu=[12, 64], workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=[ dict( type=dataset_type, diff --git a/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_insvideo_rgb.py b/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_insvideo_rgb.py index 30c65e4481..89d369403c 100644 --- a/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_insvideo_rgb.py +++ b/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_insvideo_rgb.py @@ -91,6 +91,7 @@ data = dict( videos_per_gpu=12, workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=[ dict( type=dataset_type, diff --git a/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_kineticsraw_rgb.py b/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_kineticsraw_rgb.py index 0f0454d410..f86eaa5f69 100644 --- a/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_kineticsraw_rgb.py +++ b/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_kineticsraw_rgb.py @@ -91,6 +91,7 @@ data = dict( videos_per_gpu=12, workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=[ dict( type=dataset_type, diff --git a/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_omnisource_rgb.py b/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_omnisource_rgb.py index 7832b953b8..e87c726b47 100644 --- a/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_omnisource_rgb.py +++ b/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_omnisource_rgb.py @@ -122,7 +122,8 @@ videos_per_gpu=12, omni_videos_per_gpu=[12, 64, 12, 12], train_ratio=[2, 1, 1, 1], - workers_per_gpu=1, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=[ dict( type=dataset_type, diff --git a/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_rgb.py b/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_rgb.py index 012a7ea51a..6ec9e1dc65 100644 --- a/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_rgb.py +++ b/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_rgb.py @@ -71,7 +71,8 @@ data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_webimage_rgb.py b/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_webimage_rgb.py index 2ae15da4f7..070aa8571e 100644 --- a/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_webimage_rgb.py +++ b/configs/recognition/omnisource/tsn_r50_1x1x8_100e_minikinetics/tsn_r50_1x1x8_100e_minikinetics_webimage_rgb.py @@ -90,6 +90,7 @@ videos_per_gpu=12, omni_videos_per_gpu=[12, 64], workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=[ dict( type=dataset_type, diff --git a/configs/recognition/r2plus1d/metafile.yml b/configs/recognition/r2plus1d/metafile.yml index 9a95900717..a88409b3bb 100644 --- a/configs/recognition/r2plus1d/metafile.yml +++ b/configs/recognition/r2plus1d/metafile.yml @@ -20,7 +20,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 67.3 - top5 accuracy: 87.65 + Top 5 Accuracy: 87.65 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/20200728_021421.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/20200728_021421.log @@ -43,7 +43,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 67.3 - top5 accuracy: 87.8 + Top 5 Accuracy: 87.8 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_video_8x8x1_180e_kinetics400_rgb/20200724_201360.log Training Log: https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_video_8x8x1_180e_kinetics400_rgb/20200724_201360.log.json @@ -66,7 +66,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 68.68 - top5 accuracy: 88.36 + Top 5 Accuracy: 88.36 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_8x8_69.58_88.36.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_8x8x1_180e_kinetics400_rgb/r21d_8x8.log @@ -89,7 +89,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 74.6 - top5 accuracy: 91.59 + Top 5 Accuracy: 91.59 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_32x2x1_180e_kinetics400_rgb/r2plus1d_r34_32x2_74.6_91.6.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_32x2x1_180e_kinetics400_rgb/r21d_32x2.log diff --git a/configs/recognition/r2plus1d/r2plus1d_r34_32x2x1_180e_kinetics400_rgb.py b/configs/recognition/r2plus1d/r2plus1d_r34_32x2x1_180e_kinetics400_rgb.py index fc5514a9be..53b1763099 100644 --- a/configs/recognition/r2plus1d/r2plus1d_r34_32x2x1_180e_kinetics400_rgb.py +++ b/configs/recognition/r2plus1d/r2plus1d_r34_32x2x1_180e_kinetics400_rgb.py @@ -53,7 +53,8 @@ ] data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/r2plus1d/r2plus1d_r34_8x8x1_180e_kinetics400_rgb.py b/configs/recognition/r2plus1d/r2plus1d_r34_8x8x1_180e_kinetics400_rgb.py index a4c25d7f69..f06d5696a2 100644 --- a/configs/recognition/r2plus1d/r2plus1d_r34_8x8x1_180e_kinetics400_rgb.py +++ b/configs/recognition/r2plus1d/r2plus1d_r34_8x8x1_180e_kinetics400_rgb.py @@ -55,7 +55,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/r2plus1d/r2plus1d_r34_video_8x8x1_180e_kinetics400_rgb.py b/configs/recognition/r2plus1d/r2plus1d_r34_video_8x8x1_180e_kinetics400_rgb.py index c3744dcf46..49c85c2ae7 100644 --- a/configs/recognition/r2plus1d/r2plus1d_r34_video_8x8x1_180e_kinetics400_rgb.py +++ b/configs/recognition/r2plus1d/r2plus1d_r34_video_8x8x1_180e_kinetics400_rgb.py @@ -59,7 +59,8 @@ ] data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowfast/metafile.yml b/configs/recognition/slowfast/metafile.yml index a3841f2f0c..a55a9fdec0 100644 --- a/configs/recognition/slowfast/metafile.yml +++ b/configs/recognition/slowfast/metafile.yml @@ -20,7 +20,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 74.75 - top5 accuracy: 91.73 + Top 5 Accuracy: 91.73 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_4x16x1_256e_kinetics400_rgb/20200731_151706.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_4x16x1_256e_kinetics400_rgb/20200731_151706.log @@ -43,7 +43,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 74.34 - top5 accuracy: 91.58 + Top 5 Accuracy: 91.58 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_video_4x16x1_256e_kinetics400_rgb/20200812_160237.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_video_4x16x1_256e_kinetics400_rgb/20200812_160237.log @@ -66,7 +66,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 75.64 - top5 accuracy: 92.3 + Top 5 Accuracy: 92.3 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/20200704_232901.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb/20200704_232901.log @@ -89,7 +89,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 75.61 - top5 accuracy: 92.34 + Top 5 Accuracy: 92.34 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/20200731_151537.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_256p_8x8x1_256e_kinetics400_rgb/20200731_151537.log @@ -112,7 +112,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 76.94 - top5 accuracy: 92.8 + Top 5 Accuracy: 92.8 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/20200716_192653.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/20200716_192653.log @@ -135,7 +135,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 76.69 - top5 accuracy: 93.07 + Top 5 Accuracy: 93.07 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb/20210118_133528.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_4x16x1_256e_kinetics400_rgb/20210118_133528.log @@ -158,7 +158,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 77.9 - top5 accuracy: 93.51 + Top 5 Accuracy: 93.51 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb/20210218_121513.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb/20210218_121513.log @@ -181,7 +181,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 77.13 - top5 accuracy: 93.2 + Top 5 Accuracy: 93.2 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb/20210122_131321.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r152_4x16x1_256e_kinetics400_rgb/20210122_131321.log @@ -204,7 +204,7 @@ Models: - Dataset: SthV1 Metrics: Top 1 Accuracy: 49.24 - top5 accuracy: 78.79 + Top 5 Accuracy: 78.79 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_16x8x1_22e_sthv1_rgb/20210606_225114.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_16x8x1_22e_sthv1_rgb/20210606_225114.log diff --git a/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py index e484296692..31c52441e8 100644 --- a/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowfast/slowfast_r101_8x8x1_256e_kinetics400_rgb.py @@ -33,9 +33,10 @@ in_channels=2304, # 2048+256 num_classes=400, spatial_type='avg', - dropout_ratio=0.5)) -train_cfg = None -test_cfg = dict(average_clips='prob') + dropout_ratio=0.5), + train_cfg=None, + test_cfg=dict(average_clips='prob', max_testing_views=10)) + dataset_type = 'RawframeDataset' data_root = 'data/kinetics400/rawframes_train' data_root_val = 'data/kinetics400/rawframes_val' @@ -88,7 +89,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowfast/slowfast_r101_r50_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r101_r50_4x16x1_256e_kinetics400_rgb.py index dc2f1b898a..b8da9030e6 100644 --- a/configs/recognition/slowfast/slowfast_r101_r50_4x16x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowfast/slowfast_r101_r50_4x16x1_256e_kinetics400_rgb.py @@ -32,9 +32,10 @@ in_channels=2304, # 2048+256 num_classes=400, spatial_type='avg', - dropout_ratio=0.5)) -train_cfg = None -test_cfg = dict(average_clips='prob') + dropout_ratio=0.5), + train_cfg=None, + test_cfg=dict(average_clips='prob', max_testing_views=10)) + dataset_type = 'RawframeDataset' data_root = 'data/kinetics400/rawframes_train' data_root_val = 'data/kinetics400/rawframes_val' @@ -87,7 +88,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowfast/slowfast_r152_r50_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r152_r50_4x16x1_256e_kinetics400_rgb.py index 06a9792ddd..0d9cd7ee10 100644 --- a/configs/recognition/slowfast/slowfast_r152_r50_4x16x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowfast/slowfast_r152_r50_4x16x1_256e_kinetics400_rgb.py @@ -32,9 +32,10 @@ in_channels=2304, # 2048+256 num_classes=400, spatial_type='avg', - dropout_ratio=0.5)) -train_cfg = None -test_cfg = dict(average_clips='prob', max_testing_views=8) + dropout_ratio=0.5), + train_cfg=None, + test_cfg=dict(average_clips='prob', max_testing_views=8)) + dataset_type = 'RawframeDataset' data_root = 'data/kinetics400/rawframes_train' data_root_val = 'data/kinetics400/rawframes_val' @@ -87,7 +88,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowfast/slowfast_r50_16x8x1_22e_sthv1_rgb.py b/configs/recognition/slowfast/slowfast_r50_16x8x1_22e_sthv1_rgb.py index d97cc8f613..f1e692c050 100644 --- a/configs/recognition/slowfast/slowfast_r50_16x8x1_22e_sthv1_rgb.py +++ b/configs/recognition/slowfast/slowfast_r50_16x8x1_22e_sthv1_rgb.py @@ -66,7 +66,8 @@ data = dict( videos_per_gpu=4, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb.py index a0de3fe8ca..7e455a7ca6 100644 --- a/configs/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowfast/slowfast_r50_4x16x1_256e_kinetics400_rgb.py @@ -54,7 +54,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py index 49a30be628..ee68e80e05 100644 --- a/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py @@ -5,6 +5,6 @@ resample_rate=4, # tau speed_ratio=4, # alpha channel_ratio=8, # beta_inv - slow_pathway=dict(fusion_kernel=7))) + slow_pathway=dict(fusion_kernel=5))) work_dir = './work_dirs/slowfast_r50_3d_8x8x1_256e_kinetics400_rgb' diff --git a/configs/recognition/slowfast/slowfast_r50_video_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowfast/slowfast_r50_video_4x16x1_256e_kinetics400_rgb.py index 57108548d4..7335b3e7b4 100644 --- a/configs/recognition/slowfast/slowfast_r50_video_4x16x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowfast/slowfast_r50_video_4x16x1_256e_kinetics400_rgb.py @@ -63,7 +63,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/README.md b/configs/recognition/slowonly/README.md index 292bc42581..92672e5ec6 100644 --- a/configs/recognition/slowonly/README.md +++ b/configs/recognition/slowonly/README.md @@ -95,7 +95,7 @@ In data benchmark, we compare two different data preprocessing methods: (1) Resi |config | gpus | backbone | pretrain | top1 acc| top5 acc | gpu_mem(M) | ckpt | log| json| |:--|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:| -|[slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb](/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb.py)|8|ResNet50|ImageNet|46.63|77.19|7759|[ckpt](https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb_20210630-807a9a9a.pth)|[log](https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/20210605_235410.log)|[json](https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/20210605_235410.log.json)| +|[slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb](/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb.py)|8|ResNet50|ImageNet|46.63|77.19|7759|[ckpt](https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/slowonly_r50_8x4x1_64e_sthv1_rgb-34901d23.pth)|[log](https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/slowonly_r50_8x4x1_64e_sthv1_rgb.log)|[json](https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/slowonly_r50_8x4x1_64e_sthv1_rgb.json)| Notes: diff --git a/configs/recognition/slowonly/README_zh-CN.md b/configs/recognition/slowonly/README_zh-CN.md index 726a2cabe4..a8e87e4174 100644 --- a/configs/recognition/slowonly/README_zh-CN.md +++ b/configs/recognition/slowonly/README_zh-CN.md @@ -95,7 +95,7 @@ |配置文件 | GPU 数量 | 主干网络 | 预训练 | top1 准确率| top5 准确率 | GPU 显存占用 (M) | ckpt | log| json| |:--|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:| -|[slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb](/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb.py)|8|ResNet50|ImageNet|46.63|77.19|7759|[ckpt](https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb_20210630-807a9a9a.pth)|[log](https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/20210605_235410.log)|[json](https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/20210605_235410.log.json)| +|[slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb](/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb.py)|8|ResNet50|ImageNet|46.63|77.19|7759|[ckpt](https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/slowonly_r50_8x4x1_64e_sthv1_rgb-34901d23.pth)|[log](https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/slowonly_r50_8x4x1_64e_sthv1_rgb.log)|[json](https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/slowonly_r50_8x4x1_64e_sthv1_rgb.json)| 注: diff --git a/configs/recognition/slowonly/data_benchmark/slowonly_r50_randomresizedcrop_256p_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowonly/data_benchmark/slowonly_r50_randomresizedcrop_256p_4x16x1_256e_kinetics400_rgb.py index 2624e00be6..e79543a59a 100644 --- a/configs/recognition/slowonly/data_benchmark/slowonly_r50_randomresizedcrop_256p_4x16x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowonly/data_benchmark/slowonly_r50_randomresizedcrop_256p_4x16x1_256e_kinetics400_rgb.py @@ -71,7 +71,8 @@ ] data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/data_benchmark/slowonly_r50_randomresizedcrop_320p_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowonly/data_benchmark/slowonly_r50_randomresizedcrop_320p_4x16x1_256e_kinetics400_rgb.py index 7aea6956cb..b2d55cefae 100644 --- a/configs/recognition/slowonly/data_benchmark/slowonly_r50_randomresizedcrop_320p_4x16x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowonly/data_benchmark/slowonly_r50_randomresizedcrop_320p_4x16x1_256e_kinetics400_rgb.py @@ -70,7 +70,8 @@ ] data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/data_benchmark/slowonly_r50_randomresizedcrop_340x256_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowonly/data_benchmark/slowonly_r50_randomresizedcrop_340x256_4x16x1_256e_kinetics400_rgb.py index 638324ae81..d5c38635b2 100644 --- a/configs/recognition/slowonly/data_benchmark/slowonly_r50_randomresizedcrop_340x256_4x16x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowonly/data_benchmark/slowonly_r50_randomresizedcrop_340x256_4x16x1_256e_kinetics400_rgb.py @@ -70,7 +70,8 @@ ] data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/metafile.yml b/configs/recognition/slowonly/metafile.yml index 34d52f70db..0e8d1e8196 100644 --- a/configs/recognition/slowonly/metafile.yml +++ b/configs/recognition/slowonly/metafile.yml @@ -19,7 +19,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 76.8 - top5 accuracy: 92.5 + Top 5 Accuracy: 92.5 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/slowonly/omni/slowonly_r50_omni_4x16x1_kinetics400_rgb_20200926-51b1f7ea.pth - Config: configs/recognition/slowonly/slowonly_r101_8x8x1_196e_kinetics400_rgb.py @@ -39,7 +39,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 76.5 - top5 accuracy: 92.7 + Top 5 Accuracy: 92.7 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/slowonly/omni/slowonly_r101_without_omni_8x8x1_kinetics400_rgb_20200926-0c730aef.pth - Config: configs/recognition/slowonly/slowonly_r101_8x8x1_196e_kinetics400_rgb.py @@ -59,7 +59,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 80.4 - top5 accuracy: 94.4 + Top 5 Accuracy: 94.4 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/slowonly/omni/slowonly_r101_omni_8x8x1_kinetics400_rgb_20200926-b5dbb701.pth - Config: configs/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_rgb.py @@ -80,7 +80,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 72.76 - top5 accuracy: 90.51 + Top 5 Accuracy: 90.51 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_256p_4x16x1_256e_kinetics400_rgb/20200817_001411.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_256p_4x16x1_256e_kinetics400_rgb/20200817_001411.log @@ -103,7 +103,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 72.9 - top5 accuracy: 90.82 + Top 5 Accuracy: 90.82 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_video_320p_4x16x1_256e_kinetics400_rgb/slowonly_r50_video_320p_4x16x1_256e_kinetics400_rgb_20201014.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_video_320p_4x16x1_256e_kinetics400_rgb/slowonly_r50_video_320p_4x16x1_256e_kinetics400_rgb_20201014.log @@ -126,7 +126,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 74.42 - top5 accuracy: 91.49 + Top 5 Accuracy: 91.49 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_256p_8x8x1_256e_kinetics400_rgb/20200817_003320.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_256p_8x8x1_256e_kinetics400_rgb/20200817_003320.log @@ -149,7 +149,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 73.02 - top5 accuracy: 90.77 + Top 5 Accuracy: 90.77 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_rgb/slowonly_r50_4x16_73.02_90.77.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_rgb/so_4x16.log @@ -172,7 +172,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 74.93 - top5 accuracy: 91.92 + Top 5 Accuracy: 91.92 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_rgb/slowonly_r50_8x8_74.93_91.92.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_rgb/so_8x8.log @@ -195,7 +195,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 73.39 - top5 accuracy: 91.12 + Top 5 Accuracy: 91.12 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_4x16x1_150e_kinetics400_rgb/slowonly_imagenet_pretrained_r50_4x16x1_150e_kinetics400_rgb_20200912.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_4x16x1_150e_kinetics400_rgb/slowonly_imagenet_pretrained_r50_4x16x1_150e_kinetics400_rgb_20200912.log @@ -218,7 +218,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 75.55 - top5 accuracy: 92.04 + Top 5 Accuracy: 92.04 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x8x1_150e_kinetics400_rgb/slowonly_imagenet_pretrained_r50_8x8x1_150e_kinetics400_rgb_20200912.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x8x1_150e_kinetics400_rgb/slowonly_imagenet_pretrained_r50_8x8x1_150e_kinetics400_rgb_20200912.log @@ -241,7 +241,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 74.54 - top5 accuracy: 91.73 + Top 5 Accuracy: 91.73 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_4x16x1_150e_kinetics400_rgb/20210305_152630.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_4x16x1_150e_kinetics400_rgb/20210305_152630.log @@ -264,7 +264,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 76.07 - top5 accuracy: 92.42 + Top 5 Accuracy: 92.42 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_8x8x1_150e_kinetics400_rgb/20210308_212250.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_8x8x1_150e_kinetics400_rgb/20210308_212250.log @@ -287,7 +287,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 61.79 - top5 accuracy: 83.62 + Top 5 Accuracy: 83.62 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_flow/slowonly_r50_4x16x1_256e_kinetics400_flow_61.8_83.6.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_flow/slowonly_r50_4x16x1_256e_kinetics400_flow_61.8_83.6.log @@ -310,7 +310,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 65.76 - top5 accuracy: 86.25 + Top 5 Accuracy: 86.25 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_flow/slowonly_r50_8x8x1_196e_kinetics400_flow_65.8_86.3.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_flow/slowonly_r50_8x8x1_196e_kinetics400_flow_65.8_86.3.log @@ -333,7 +333,7 @@ Models: - Dataset: Kinetics-600 Metrics: Top 1 Accuracy: 77.5 - top5 accuracy: 93.7 + Top 5 Accuracy: 93.7 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics600_rgb/slowonly_r50_video_8x8x1_256e_kinetics600_rgb_20201015.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics600_rgb/slowonly_r50_video_8x8x1_256e_kinetics600_rgb_20201015.log @@ -356,7 +356,7 @@ Models: - Dataset: Kinetics-700 Metrics: Top 1 Accuracy: 65.0 - top5 accuracy: 86.1 + Top 5 Accuracy: 86.1 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics700_rgb/slowonly_r50_video_8x8x1_256e_kinetics700_rgb_20201015.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics700_rgb/slowonly_r50_video_8x8x1_256e_kinetics700_rgb_20201015.log @@ -446,7 +446,7 @@ Models: - Dataset: HMDB51 Metrics: Top 1 Accuracy: 37.52 - top5 accuracy: 71.5 + Top 5 Accuracy: 71.5 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_hmdb51_rgb/20210605_185256.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_hmdb51_rgb/20210605_185256.log @@ -468,7 +468,7 @@ Models: - Dataset: HMDB51 Metrics: Top 1 Accuracy: 65.95 - top5 accuracy: 91.05 + Top 5 Accuracy: 91.05 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_hmdb51_rgb/20210606_010153.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_hmdb51_rgb/20210606_010153.log @@ -490,7 +490,7 @@ Models: - Dataset: UCF101 Metrics: Top 1 Accuracy: 71.35 - top5 accuracy: 89.35 + Top 5 Accuracy: 89.35 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_ucf101_rgb/20210605_213503.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_ucf101_rgb/20210605_213503.log @@ -512,7 +512,7 @@ Models: - Dataset: UCF101 Metrics: Top 1 Accuracy: 92.78 - top5 accuracy: 99.42 + Top 5 Accuracy: 99.42 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_ucf101_rgb/20210606_010231.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_ucf101_rgb/20210606_010231.log @@ -534,8 +534,8 @@ Models: - Dataset: SthV1 Metrics: Top 1 Accuracy: 46.63 - top5 accuracy: 77.19 + Top 5 Accuracy: 77.19 Task: Action Recognition - Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/20210605_235410.log.json - Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/20210605_235410.log - Weights: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb_20210630-807a9a9a.pth + Training Json Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/slowonly_r50_8x4x1_64e_sthv1_rgb.json + Training Log: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/slowonly_r50_8x4x1_64e_sthv1_rgb.log + Weights: https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb/slowonly_r50_8x4x1_64e_sthv1_rgb-34901d23.pth diff --git a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_4x16x1_120e_gym99_rgb.py b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_4x16x1_120e_gym99_rgb.py index 8bfcd77d39..9ef7dfbe44 100644 --- a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_4x16x1_120e_gym99_rgb.py +++ b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_4x16x1_120e_gym99_rgb.py @@ -55,7 +55,8 @@ ] data = dict( videos_per_gpu=24, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_4x16x1_150e_kinetics400_rgb.py b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_4x16x1_150e_kinetics400_rgb.py index 5ed60a91eb..750d01b8b4 100644 --- a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_4x16x1_150e_kinetics400_rgb.py +++ b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_4x16x1_150e_kinetics400_rgb.py @@ -55,7 +55,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_hmdb51_rgb.py b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_hmdb51_rgb.py index 1a95cc0155..0305527d3d 100644 --- a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_hmdb51_rgb.py +++ b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_hmdb51_rgb.py @@ -63,8 +63,8 @@ data = dict( videos_per_gpu=8, - workers_per_gpu=4, - test_dataloader=dict(videos_per_gpu=2), + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb.py b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb.py index 588c6b7803..89457ddf04 100644 --- a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb.py +++ b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb.py @@ -62,8 +62,8 @@ data = dict( videos_per_gpu=8, - workers_per_gpu=4, - test_dataloader=dict(videos_per_gpu=2), + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv2_rgb.py b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv2_rgb.py index db92d92e67..65720cffbc 100644 --- a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv2_rgb.py +++ b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv2_rgb.py @@ -62,8 +62,8 @@ data = dict( videos_per_gpu=8, - workers_per_gpu=4, - test_dataloader=dict(videos_per_gpu=2), + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_ucf101_rgb.py b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_ucf101_rgb.py index 3dd5808b34..48df87cc32 100644 --- a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_ucf101_rgb.py +++ b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_ucf101_rgb.py @@ -63,8 +63,8 @@ data = dict( videos_per_gpu=8, - workers_per_gpu=4, - test_dataloader=dict(videos_per_gpu=2), + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x8x1_150e_kinetics400_rgb.py b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x8x1_150e_kinetics400_rgb.py index 9a09622804..0e34eda9fd 100644 --- a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x8x1_150e_kinetics400_rgb.py +++ b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x8x1_150e_kinetics400_rgb.py @@ -55,7 +55,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x8x1_64e_jester_rgb.py b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x8x1_64e_jester_rgb.py index d2517e8391..6e4e7fbc33 100644 --- a/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x8x1_64e_jester_rgb.py +++ b/configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x8x1_64e_jester_rgb.py @@ -58,7 +58,7 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/recognition/slowonly/slowonly_k400_pretrained_r50_4x16x1_120e_gym99_flow.py b/configs/recognition/slowonly/slowonly_k400_pretrained_r50_4x16x1_120e_gym99_flow.py index 8ad75bace6..7ac7a0bedd 100644 --- a/configs/recognition/slowonly/slowonly_k400_pretrained_r50_4x16x1_120e_gym99_flow.py +++ b/configs/recognition/slowonly/slowonly_k400_pretrained_r50_4x16x1_120e_gym99_flow.py @@ -57,7 +57,8 @@ ] data = dict( videos_per_gpu=24, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_hmdb51_rgb.py b/configs/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_hmdb51_rgb.py index b59a759a9f..53832d7dc1 100644 --- a/configs/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_hmdb51_rgb.py +++ b/configs/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_hmdb51_rgb.py @@ -59,8 +59,8 @@ data = dict( videos_per_gpu=8, - workers_per_gpu=4, - test_dataloader=dict(videos_per_gpu=2), + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_ucf101_rgb.py b/configs/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_ucf101_rgb.py index da2341030c..c4e5be479d 100644 --- a/configs/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_ucf101_rgb.py +++ b/configs/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_ucf101_rgb.py @@ -62,8 +62,8 @@ data = dict( videos_per_gpu=8, - workers_per_gpu=4, - test_dataloader=dict(videos_per_gpu=2), + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_4x16x1_150e_kinetics400_rgb.py b/configs/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_4x16x1_150e_kinetics400_rgb.py index e5e33a126d..85d8b7f237 100644 --- a/configs/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_4x16x1_150e_kinetics400_rgb.py +++ b/configs/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_4x16x1_150e_kinetics400_rgb.py @@ -68,7 +68,7 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, diff --git a/configs/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_8x8x1_150e_kinetics400_rgb.py b/configs/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_8x8x1_150e_kinetics400_rgb.py index 8331fdac8f..4f71e890c5 100644 --- a/configs/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_8x8x1_150e_kinetics400_rgb.py +++ b/configs/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_8x8x1_150e_kinetics400_rgb.py @@ -68,7 +68,7 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, diff --git a/configs/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_flow.py b/configs/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_flow.py index 04c0a25ca8..02a3faf696 100644 --- a/configs/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_flow.py +++ b/configs/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_flow.py @@ -57,7 +57,8 @@ ] data = dict( videos_per_gpu=24, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_rgb.py index 2fbab6150d..a68c8efa88 100644 --- a/configs/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_rgb.py @@ -58,7 +58,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_flow.py b/configs/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_flow.py index 5d55e3386e..2cba67d9e1 100644 --- a/configs/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_flow.py +++ b/configs/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_flow.py @@ -57,7 +57,8 @@ ] data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_rgb.py b/configs/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_rgb.py index 5764678c84..eec3694e7a 100644 --- a/configs/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_rgb.py @@ -58,7 +58,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_r50_video_4x16x1_256e_kinetics400_rgb.py b/configs/recognition/slowonly/slowonly_r50_video_4x16x1_256e_kinetics400_rgb.py index 3a89d35785..202fa4e330 100644 --- a/configs/recognition/slowonly/slowonly_r50_video_4x16x1_256e_kinetics400_rgb.py +++ b/configs/recognition/slowonly/slowonly_r50_video_4x16x1_256e_kinetics400_rgb.py @@ -61,7 +61,8 @@ ] data = dict( videos_per_gpu=24, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics600_rgb.py b/configs/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics600_rgb.py index 311216fd9c..4b2b987b68 100644 --- a/configs/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics600_rgb.py +++ b/configs/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics600_rgb.py @@ -58,7 +58,8 @@ ] data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics700_rgb.py b/configs/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics700_rgb.py index 79edfb6db2..4cbc901850 100644 --- a/configs/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics700_rgb.py +++ b/configs/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics700_rgb.py @@ -57,7 +57,8 @@ ] data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tanet/metafile.yml b/configs/recognition/tanet/metafile.yml index 76b5867a14..19a5d47839 100644 --- a/configs/recognition/tanet/metafile.yml +++ b/configs/recognition/tanet/metafile.yml @@ -20,7 +20,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 76.28 - top5 accuracy: 92.6 + Top 5 Accuracy: 92.6 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tanet/tanet_r50_dense_1x1x8_100e_kinetics400_rgb/tanet_r50_dense_1x1x8_100e_kinetics400_rgb_20210219.json Training Log: https://download.openmmlab.com/mmaction/recognition/tanet/tanet_r50_dense_1x1x8_100e_kinetics400_rgb/tanet_r50_dense_1x1x8_100e_kinetics400_rgb_20210219.log @@ -44,8 +44,8 @@ Models: Metrics: Top 1 Accuracy: 49.69 Top 1 Accuracy (efficient): 47.45 - top5 accuracy: 77.62 - top5 accuracy (efficient): 76.0 + Top 5 Accuracy: 77.62 + Top 5 Accuracy (efficient): 76.0 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tanet/tanet_r50_1x1x8_50e_sthv1_rgb/20210606_205006.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tanet/tanet_r50_1x1x8_50e_sthv1_rgb/20210606_205006.log @@ -69,8 +69,8 @@ Models: Metrics: Top 1 Accuracy: 50.41 Top 1 Accuracy (efficient): 47.73 - top5 accuracy: 78.47 - top5 accuracy (efficient): 77.31 + Top 5 Accuracy: 78.47 + Top 5 Accuracy (efficient): 77.31 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tanet/tanet_r50_1x1x16_50e_sthv1_rgb/20210607_155335.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tanet/tanet_r50_1x1x16_50e_sthv1_rgb/20210607_155335.log diff --git a/configs/recognition/tanet/tanet_r50_1x1x16_50e_sthv1_rgb.py b/configs/recognition/tanet/tanet_r50_1x1x16_50e_sthv1_rgb.py index d6ff915721..741bd4db65 100644 --- a/configs/recognition/tanet/tanet_r50_1x1x16_50e_sthv1_rgb.py +++ b/configs/recognition/tanet/tanet_r50_1x1x16_50e_sthv1_rgb.py @@ -59,10 +59,11 @@ clip_len=1, frame_interval=1, num_clips=16, + twice_sample=True, test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), + dict(type='ThreeCrop', crop_size=256), dict(type='Normalize', **img_norm_cfg), dict(type='FormatShape', input_format='NCHW'), dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), @@ -70,7 +71,8 @@ ] data = dict( videos_per_gpu=4, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tanet/tanet_r50_1x1x8_50e_sthv1_rgb.py b/configs/recognition/tanet/tanet_r50_1x1x8_50e_sthv1_rgb.py index 987336e081..2aa497dca9 100644 --- a/configs/recognition/tanet/tanet_r50_1x1x8_50e_sthv1_rgb.py +++ b/configs/recognition/tanet/tanet_r50_1x1x8_50e_sthv1_rgb.py @@ -57,10 +57,11 @@ clip_len=1, frame_interval=1, num_clips=8, + twice_sample=True, test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), + dict(type='ThreeCrop', crop_size=256), dict(type='Normalize', **img_norm_cfg), dict(type='FormatShape', input_format='NCHW'), dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), @@ -68,7 +69,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tanet/tanet_r50_dense_1x1x8_100e_kinetics400_rgb.py b/configs/recognition/tanet/tanet_r50_dense_1x1x8_100e_kinetics400_rgb.py index a651ecf7f7..3ac78366c2 100644 --- a/configs/recognition/tanet/tanet_r50_dense_1x1x8_100e_kinetics400_rgb.py +++ b/configs/recognition/tanet/tanet_r50_dense_1x1x8_100e_kinetics400_rgb.py @@ -63,8 +63,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, - test_dataloader=dict(videos_per_gpu=2), + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/timesformer/timesformer_divST_8x32x1_15e_kinetics400_rgb.py b/configs/recognition/timesformer/timesformer_divST_8x32x1_15e_kinetics400_rgb.py index f4bd1614d1..8772ad953b 100644 --- a/configs/recognition/timesformer/timesformer_divST_8x32x1_15e_kinetics400_rgb.py +++ b/configs/recognition/timesformer/timesformer_divST_8x32x1_15e_kinetics400_rgb.py @@ -75,7 +75,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/timesformer/timesformer_jointST_8x32x1_15e_kinetics400_rgb.py b/configs/recognition/timesformer/timesformer_jointST_8x32x1_15e_kinetics400_rgb.py index 66eec25eca..4f4fdf7cbc 100644 --- a/configs/recognition/timesformer/timesformer_jointST_8x32x1_15e_kinetics400_rgb.py +++ b/configs/recognition/timesformer/timesformer_jointST_8x32x1_15e_kinetics400_rgb.py @@ -75,7 +75,8 @@ ] data = dict( videos_per_gpu=7, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/timesformer/timesformer_spaceOnly_8x32x1_15e_kinetics400_rgb.py b/configs/recognition/timesformer/timesformer_spaceOnly_8x32x1_15e_kinetics400_rgb.py index 6d859e8b37..a6207d9542 100644 --- a/configs/recognition/timesformer/timesformer_spaceOnly_8x32x1_15e_kinetics400_rgb.py +++ b/configs/recognition/timesformer/timesformer_spaceOnly_8x32x1_15e_kinetics400_rgb.py @@ -75,7 +75,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tin/metafile.yml b/configs/recognition/tin/metafile.yml index 539b8899c1..2fe338d80d 100644 --- a/configs/recognition/tin/metafile.yml +++ b/configs/recognition/tin/metafile.yml @@ -20,7 +20,7 @@ Models: - Dataset: SthV1 Metrics: Top 1 Accuracy: 44.25 - top5 accuracy: 73.94 + Top 5 Accuracy: 73.94 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tin/tin_r50_1x1x8_40e_sthv1_rgb/20200729_034132.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tin/tin_r50_1x1x8_40e_sthv1_rgb/20200729_034132.log @@ -43,7 +43,7 @@ Models: - Dataset: SthV2 Metrics: Top 1 Accuracy: 56.7 - top5 accuracy: 83.62 + Top 5 Accuracy: 83.62 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tin/tin_r50_1x1x8_40e_sthv2_rgb/20200912_225451.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tin/tin_r50_1x1x8_40e_sthv2_rgb/20200912_225451.log @@ -66,7 +66,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 70.89 - top5 accuracy: 89.89 + Top 5 Accuracy: 89.89 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tin/tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb/20200809_142447.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tin/tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb/20200809_142447.log diff --git a/configs/recognition/tin/tin_r50_1x1x8_40e_sthv1_rgb.py b/configs/recognition/tin/tin_r50_1x1x8_40e_sthv1_rgb.py index f6bcf82807..3ba652479b 100644 --- a/configs/recognition/tin/tin_r50_1x1x8_40e_sthv1_rgb.py +++ b/configs/recognition/tin/tin_r50_1x1x8_40e_sthv1_rgb.py @@ -60,7 +60,8 @@ ] data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tin/tin_r50_1x1x8_40e_sthv2_rgb.py b/configs/recognition/tin/tin_r50_1x1x8_40e_sthv2_rgb.py index 8d1a93d561..35bbd26b00 100644 --- a/configs/recognition/tin/tin_r50_1x1x8_40e_sthv2_rgb.py +++ b/configs/recognition/tin/tin_r50_1x1x8_40e_sthv2_rgb.py @@ -60,7 +60,8 @@ ] data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tin/tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb.py b/configs/recognition/tin/tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb.py index 9277fe9ac8..81f03a7344 100644 --- a/configs/recognition/tin/tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb.py +++ b/configs/recognition/tin/tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb.py @@ -64,7 +64,8 @@ ] data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tpn/metafile.yml b/configs/recognition/tpn/metafile.yml index bb70db3cd3..ecf75fe278 100644 --- a/configs/recognition/tpn/metafile.yml +++ b/configs/recognition/tpn/metafile.yml @@ -66,7 +66,7 @@ Models: - Dataset: SthV1 Metrics: Top 1 Accuracy: 50.8 - top5 accuracy: 79.05 + Top 5 Accuracy: 79.05 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tpn/tpn_tsm_r50_1x1x8_150e_sthv1_rgb/20210311_162636.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tpn/tpn_tsm_r50_1x1x8_150e_sthv1_rgb/20210311_162636.log diff --git a/configs/recognition/tpn/tpn_imagenet_pretrained_slowonly_r50_8x8x1_150e_kinetics_rgb.py b/configs/recognition/tpn/tpn_imagenet_pretrained_slowonly_r50_8x8x1_150e_kinetics_rgb.py index a84a0b1895..3b1738fdcf 100644 --- a/configs/recognition/tpn/tpn_imagenet_pretrained_slowonly_r50_8x8x1_150e_kinetics_rgb.py +++ b/configs/recognition/tpn/tpn_imagenet_pretrained_slowonly_r50_8x8x1_150e_kinetics_rgb.py @@ -57,6 +57,7 @@ data = dict( videos_per_gpu=8, workers_per_gpu=8, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tpn/tpn_tsm_r50_1x1x8_150e_sthv1_rgb.py b/configs/recognition/tpn/tpn_tsm_r50_1x1x8_150e_sthv1_rgb.py index 8783f550fb..0258f4a3d4 100644 --- a/configs/recognition/tpn/tpn_tsm_r50_1x1x8_150e_sthv1_rgb.py +++ b/configs/recognition/tpn/tpn_tsm_r50_1x1x8_150e_sthv1_rgb.py @@ -57,7 +57,7 @@ data = dict( videos_per_gpu=8, workers_per_gpu=8, - test_dataloader=dict(videos_per_gpu=12), + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/trn/metafile.yml b/configs/recognition/trn/metafile.yml index 1f999f63cf..aa47950de6 100644 --- a/configs/recognition/trn/metafile.yml +++ b/configs/recognition/trn/metafile.yml @@ -20,8 +20,8 @@ Models: Metrics: Top 1 Accuracy: 33.88 Top 1 Accuracy (efficient): 31.62 - top5 accuracy: 62.12 - top5 accuracy (efficient): 60.01 + Top 5 Accuracy: 62.12 + Top 5 Accuracy (efficient): 60.01 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/trn/trn_r50_1x1x8_50e_sthv1_rgb/20210326_103948.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/trn/trn_r50_1x1x8_50e_sthv1_rgb/20210326_103948.log @@ -44,8 +44,8 @@ Models: Metrics: Top 1 Accuracy: 47.96 Top 1 Accuracy (efficient): 45.14 - top5 accuracy: 75.97 - top5 accuracy (efficient): 73.21 + Top 5 Accuracy: 75.97 + Top 5 Accuracy (efficient): 73.21 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/trn/trn_r50_1x1x8_50e_sthv2_rgb/20210326_103951.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/trn/trn_r50_1x1x8_50e_sthv2_rgb/20210326_103951.log diff --git a/configs/recognition/trn/trn_r50_1x1x8_50e_sthv1_rgb.py b/configs/recognition/trn/trn_r50_1x1x8_50e_sthv1_rgb.py index 0578748296..dac55c03b7 100644 --- a/configs/recognition/trn/trn_r50_1x1x8_50e_sthv1_rgb.py +++ b/configs/recognition/trn/trn_r50_1x1x8_50e_sthv1_rgb.py @@ -68,7 +68,7 @@ ] data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, diff --git a/configs/recognition/trn/trn_r50_1x1x8_50e_sthv2_rgb.py b/configs/recognition/trn/trn_r50_1x1x8_50e_sthv2_rgb.py index a3e2615db2..09e8e2f6d5 100644 --- a/configs/recognition/trn/trn_r50_1x1x8_50e_sthv2_rgb.py +++ b/configs/recognition/trn/trn_r50_1x1x8_50e_sthv2_rgb.py @@ -68,7 +68,7 @@ ] data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, diff --git a/configs/recognition/tsm/metafile.yml b/configs/recognition/tsm/metafile.yml index 1f0bc3c81b..1a0ccf4072 100644 --- a/configs/recognition/tsm/metafile.yml +++ b/configs/recognition/tsm/metafile.yml @@ -20,7 +20,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 70.24 - top5 accuracy: 89.56 + Top 5 Accuracy: 89.56 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb/20200607_211800.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb/20200607_211800.log @@ -43,7 +43,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 70.59 - top5 accuracy: 89.52 + Top 5 Accuracy: 89.52 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_256p_1x1x8_50e_kinetics400_rgb/20200725_031623.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_256p_1x1x8_50e_kinetics400_rgb/20200725_031623.log @@ -66,7 +66,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 70.73 - top5 accuracy: 89.81 + Top 5 Accuracy: 89.81 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb/20210616_021451.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb/20210616_021451.log @@ -89,7 +89,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 71.9 - top5 accuracy: 90.03 + Top 5 Accuracy: 90.03 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_100e_kinetics400_rgb/20210617_103543.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_100e_kinetics400_rgb/20210617_103543.log @@ -112,7 +112,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 70.48 - top5 accuracy: 89.4 + Top 5 Accuracy: 89.4 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb_20210219.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb_20210219.log @@ -135,7 +135,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 70.25 - top5 accuracy: 89.66 + Top 5 Accuracy: 89.66 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_video_1x1x8_100e_kinetics400_rgb/tsm_r50_video_2d_1x1x8_50e_kinetics400_rgb.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_video_1x1x8_100e_kinetics400_rgb/tsm_r50_video_2d_1x1x8_50e_kinetics400_rgb.log @@ -158,7 +158,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 73.46 - top5 accuracy: 90.84 + Top 5 Accuracy: 90.84 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_dense_1x1x8_50e_kinetics400_rgb/20210617_103245.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_dense_1x1x8_50e_kinetics400_rgb/20210617_103245.log @@ -181,7 +181,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 74.55 - top5 accuracy: 91.74 + Top 5 Accuracy: 91.74 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_dense_1x1x8_100e_kinetics400_rgb/20210613_034931.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_dense_1x1x8_100e_kinetics400_rgb/20210613_034931.log @@ -204,7 +204,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 72.09 - top5 accuracy: 90.37 + Top 5 Accuracy: 90.37 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x16_50e_kinetics400_rgb/20201011_205356.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x16_50e_kinetics400_rgb/20201011_205356.log @@ -227,7 +227,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 71.89 - top5 accuracy: 90.73 + Top 5 Accuracy: 90.73 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_256p_1x1x16_50e_kinetics400_rgb/20201010_224825.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_256p_1x1x16_50e_kinetics400_rgb/20201010_224825.log @@ -250,7 +250,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 72.8 - top5 accuracy: 90.75 + Top 5 Accuracy: 90.75 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x16_50e_kinetics400_rgb/20210621_115844.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x16_50e_kinetics400_rgb/20210621_115844.log @@ -273,7 +273,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 72.03 - top5 accuracy: 90.25 + Top 5 Accuracy: 90.25 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_nl_embedded_gaussian_r50_1x1x8_50e_kinetics400_rgb/20200724_120023.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_nl_embedded_gaussian_r50_1x1x8_50e_kinetics400_rgb/20200724_120023.log @@ -296,7 +296,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 70.7 - top5 accuracy: 89.9 + Top 5 Accuracy: 89.9 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_nl_gaussian_r50_1x1x8_50e_kinetics400_rgb/20200815_210253.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_nl_gaussian_r50_1x1x8_50e_kinetics400_rgb/20200815_210253.log @@ -319,7 +319,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 71.6 - top5 accuracy: 90.34 + Top 5 Accuracy: 90.34 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_nl_dot_product_r50_1x1x8_50e_kinetics400_rgb/20200723_220442.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_nl_dot_product_r50_1x1x8_50e_kinetics400_rgb/20200723_220442.log @@ -342,7 +342,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 68.46 - top5 accuracy: 88.64 + Top 5 Accuracy: 88.64 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_mobilenetv2_dense_1x1x8_100e_kinetics400_rgb/20210129_024936.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_mobilenetv2_dense_1x1x8_100e_kinetics400_rgb/20210129_024936.log @@ -364,7 +364,7 @@ Models: - Dataset: Diving48 Metrics: Top 1 Accuracy: 75.99 - top5 accuracy: 97.16 + Top 5 Accuracy: 97.16 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_video_1x1x8_50e_diving48_rgb/20210426_012424.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_video_1x1x8_50e_diving48_rgb/20210426_012424.log @@ -386,7 +386,7 @@ Models: - Dataset: Diving48 Metrics: Top 1 Accuracy: 81.62 - top5 accuracy: 97.66 + Top 5 Accuracy: 97.66 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_video_1x1x16_50e_diving48_rgb/20210426_012823.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_video_1x1x16_50e_diving48_rgb/20210426_012823.log @@ -410,8 +410,8 @@ Models: Metrics: Top 1 Accuracy: 47.7 Top 1 Accuracy (efficient): 45.58 - top5 accuracy: 76.12 - top5 accuracy (efficient): 75.02 + Top 5 Accuracy: 76.12 + Top 5 Accuracy (efficient): 75.02 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_50e_sthv1_rgb/20210203_150227.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_50e_sthv1_rgb/20210203_150227.log @@ -437,8 +437,8 @@ Models: Metrics: Top 1 Accuracy: 48.51 Top 1 Accuracy (efficient): 47.1 - top5 accuracy: 77.56 - top5 accuracy (efficient): 76.02 + Top 5 Accuracy: 77.56 + Top 5 Accuracy (efficient): 76.02 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_flip_1x1x8_50e_sthv1_rgb/20210203_145829.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_flip_1x1x8_50e_sthv1_rgb/20210203_145829.log @@ -464,8 +464,8 @@ Models: Metrics: Top 1 Accuracy: 48.9 Top 1 Accuracy (efficient): 47.16 - top5 accuracy: 77.92 - top5 accuracy (efficient): 76.07 + Top 5 Accuracy: 77.92 + Top 5 Accuracy (efficient): 76.07 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_randaugment_1x1x8_50e_sthv1_rgb/tsm_r50_randaugment_1x1x8_50e_sthv1_rgb.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_randaugment_1x1x8_50e_sthv1_rgb/tsm_r50_randaugment_1x1x8_50e_sthv1_rgb.log @@ -491,8 +491,8 @@ Models: Metrics: Top 1 Accuracy: 50.31 Top 1 Accuracy (efficient): 47.85 - top5 accuracy: 78.18 - top5 accuracy (efficient): 76.78 + Top 5 Accuracy: 78.18 + Top 5 Accuracy (efficient): 76.78 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_flip_randaugment_1x1x8_50e_sthv1_rgb/tsm_r50_flip_randaugment_1x1x8_50e_sthv1_rgb.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_flip_randaugment_1x1x8_50e_sthv1_rgb/tsm_r50_flip_randaugment_1x1x8_50e_sthv1_rgb.log @@ -518,8 +518,8 @@ Models: Metrics: Top 1 Accuracy: 49.28 Top 1 Accuracy (efficient): 47.62 - top5 accuracy: 77.82 - top5 accuracy (efficient): 76.63 + Top 5 Accuracy: 77.82 + Top 5 Accuracy (efficient): 76.63 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x16_50e_sthv1_rgb/20201010_221240.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x16_50e_sthv1_rgb/20201010_221240.log @@ -545,8 +545,8 @@ Models: Metrics: Top 1 Accuracy: 48.43 Top 1 Accuracy (efficient): 45.72 - top5 accuracy: 76.72 - top5 accuracy (efficient): 74.67 + Top 5 Accuracy: 76.72 + Top 5 Accuracy (efficient): 74.67 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r101_1x1x8_50e_sthv1_rgb/20201010_224055.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r101_1x1x8_50e_sthv1_rgb/20201010_224055.log @@ -572,8 +572,8 @@ Models: Metrics: Top 1 Accuracy: 61.12 Top 1 Accuracy (efficient): 57.86 - top5 accuracy: 86.26 - top5 accuracy (efficient): 84.67 + Top 5 Accuracy: 86.26 + Top 5 Accuracy (efficient): 84.67 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_50e_sthv2_rgb/20200912_140737.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_50e_sthv2_rgb/20200912_140737.log @@ -599,8 +599,8 @@ Models: Metrics: Top 1 Accuracy: 63.84 Top 1 Accuracy (efficient): 60.79 - top5 accuracy: 88.3 - top5 accuracy (efficient): 86.6 + Top 5 Accuracy: 88.3 + Top 5 Accuracy (efficient): 86.6 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_50e_sthv2_rgb/20210401_143656.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_50e_sthv2_rgb/20210401_143656.log @@ -626,8 +626,8 @@ Models: Metrics: Top 1 Accuracy: 62.04 Top 1 Accuracy (efficient): 59.93 - top5 accuracy: 87.35 - top5 accuracy (efficient): 86.1 + Top 5 Accuracy: 87.35 + Top 5 Accuracy (efficient): 86.1 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x16_50e_sthv2_rgb/20201010_224215.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x16_50e_sthv2_rgb/20201010_224215.log @@ -653,8 +653,8 @@ Models: Metrics: Top 1 Accuracy: 63.19 Top 1 Accuracy (efficient): 61.06 - top5 accuracy: 87.93 - top5 accuracy (efficient): 86.66 + Top 5 Accuracy: 87.93 + Top 5 Accuracy (efficient): 86.66 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x16_50e_sthv2_rgb/20210331_134458.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x16_50e_sthv2_rgb/20210331_134458.log @@ -680,8 +680,8 @@ Models: Metrics: Top 1 Accuracy: 61.51 Top 1 Accuracy (efficient): 58.59 - top5 accuracy: 86.9 - top5 accuracy (efficient): 85.07 + Top 5 Accuracy: 86.9 + Top 5 Accuracy (efficient): 85.07 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r101_1x1x8_50e_sthv2_rgb/20201010_224100.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r101_1x1x8_50e_sthv2_rgb/20201010_224100.log @@ -707,8 +707,8 @@ Models: Metrics: Top 1 Accuracy: 48.49 Top 1 Accuracy (efficient): 46.35 - top5 accuracy: 76.88 - top5 accuracy (efficient): 75.07 + Top 5 Accuracy: 76.88 + Top 5 Accuracy (efficient): 75.07 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_mixup_1x1x8_50e_sthv1_rgb/tsm_r50_mixup_1x1x8_50e_sthv1_rgb.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_mixup_1x1x8_50e_sthv1_rgb/tsm_r50_mixup_1x1x8_50e_sthv1_rgb.log @@ -734,8 +734,8 @@ Models: Metrics: Top 1 Accuracy: 47.46 Top 1 Accuracy (efficient): 45.92 - top5 accuracy: 76.71 - top5 accuracy (efficient): 75.23 + Top 5 Accuracy: 76.71 + Top 5 Accuracy (efficient): 75.23 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_cutmix_1x1x8_50e_sthv1_rgb/tsm_r50_cutmix_1x1x8_50e_sthv1_rgb.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_cutmix_1x1x8_50e_sthv1_rgb/tsm_r50_cutmix_1x1x8_50e_sthv1_rgb.log @@ -782,7 +782,7 @@ Models: - Dataset: HMDB51 Metrics: Top 1 Accuracy: 72.68 - top5 accuracy: 92.03 + Top 5 Accuracy: 92.03 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_k400_pretrained_r50_1x1x8_25e_hmdb51_rgb/20210605_182554.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_k400_pretrained_r50_1x1x8_25e_hmdb51_rgb/20210605_182554.log @@ -805,7 +805,7 @@ Models: - Dataset: HMDB51 Metrics: Top 1 Accuracy: 74.77 - top5 accuracy: 93.86 + Top 5 Accuracy: 93.86 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_k400_pretrained_r50_1x1x16_25e_hmdb51_rgb/20210605_182505.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_k400_pretrained_r50_1x1x16_25e_hmdb51_rgb/20210605_182505.log @@ -828,7 +828,7 @@ Models: - Dataset: UCF101 Metrics: Top 1 Accuracy: 94.5 - top5 accuracy: 99.58 + Top 5 Accuracy: 99.58 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_k400_pretrained_r50_1x1x8_25e_ucf101_rgb/20210605_182720.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_k400_pretrained_r50_1x1x8_25e_ucf101_rgb/20210605_182720.log @@ -851,7 +851,7 @@ Models: - Dataset: UCF101 Metrics: Top 1 Accuracy: 94.58 - top5 accuracy: 99.37 + Top 5 Accuracy: 99.37 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_k400_pretrained_r50_1x1x16_25e_ucf101_rgb/20210605_182720.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsm/tsm_k400_pretrained_r50_1x1x16_25e_ucf101_rgb/20210605_182720.log diff --git a/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x16_25e_hmdb51_rgb.py b/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x16_25e_hmdb51_rgb.py index c73cc685ed..9a6535b3ed 100644 --- a/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x16_25e_hmdb51_rgb.py +++ b/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x16_25e_hmdb51_rgb.py @@ -68,7 +68,8 @@ ] data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x16_25e_ucf101_rgb.py b/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x16_25e_ucf101_rgb.py index 8fa456dd9d..92ef9bfe4c 100644 --- a/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x16_25e_ucf101_rgb.py +++ b/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x16_25e_ucf101_rgb.py @@ -68,7 +68,8 @@ ] data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x8_25e_hmdb51_rgb.py b/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x8_25e_hmdb51_rgb.py index bdc430804b..5169eda3a9 100644 --- a/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x8_25e_hmdb51_rgb.py +++ b/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x8_25e_hmdb51_rgb.py @@ -68,7 +68,8 @@ ] data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x8_25e_ucf101_rgb.py b/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x8_25e_ucf101_rgb.py index 5b6c07d478..84317727a4 100644 --- a/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x8_25e_ucf101_rgb.py +++ b/configs/recognition/tsm/tsm_k400_pretrained_r50_1x1x8_25e_ucf101_rgb.py @@ -68,7 +68,8 @@ ] data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_mobilenetv2_dense_1x1x8_100e_kinetics400_rgb.py b/configs/recognition/tsm/tsm_mobilenetv2_dense_1x1x8_100e_kinetics400_rgb.py index 57628cfc23..b6df2b32d1 100644 --- a/configs/recognition/tsm/tsm_mobilenetv2_dense_1x1x8_100e_kinetics400_rgb.py +++ b/configs/recognition/tsm/tsm_mobilenetv2_dense_1x1x8_100e_kinetics400_rgb.py @@ -63,7 +63,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_mobilenetv2_video_dense_1x1x8_100e_kinetics400_rgb.py b/configs/recognition/tsm/tsm_mobilenetv2_video_dense_1x1x8_100e_kinetics400_rgb.py index b62990f5b9..9442e1d700 100644 --- a/configs/recognition/tsm/tsm_mobilenetv2_video_dense_1x1x8_100e_kinetics400_rgb.py +++ b/configs/recognition/tsm/tsm_mobilenetv2_video_dense_1x1x8_100e_kinetics400_rgb.py @@ -66,7 +66,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_mobilenetv2_video_inference_dense_1x1x8_100e_kinetics400_rgb.py b/configs/recognition/tsm/tsm_mobilenetv2_video_inference_dense_1x1x8_100e_kinetics400_rgb.py index 71b9ed8f52..15a3edd5f4 100644 --- a/configs/recognition/tsm/tsm_mobilenetv2_video_inference_dense_1x1x8_100e_kinetics400_rgb.py +++ b/configs/recognition/tsm/tsm_mobilenetv2_video_inference_dense_1x1x8_100e_kinetics400_rgb.py @@ -25,7 +25,7 @@ data = dict( videos_per_gpu=4, - workers_per_gpu=4, + workers_per_gpu=2, test=dict( type=dataset_type, ann_file=ann_file_test, diff --git a/configs/recognition/tsm/tsm_nl_dot_product_r50_1x1x8_50e_kinetics400_rgb.py b/configs/recognition/tsm/tsm_nl_dot_product_r50_1x1x8_50e_kinetics400_rgb.py index f04cba0e46..884a2d663c 100644 --- a/configs/recognition/tsm/tsm_nl_dot_product_r50_1x1x8_50e_kinetics400_rgb.py +++ b/configs/recognition/tsm/tsm_nl_dot_product_r50_1x1x8_50e_kinetics400_rgb.py @@ -72,7 +72,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_nl_embedded_gaussian_r50_1x1x8_50e_kinetics400_rgb.py b/configs/recognition/tsm/tsm_nl_embedded_gaussian_r50_1x1x8_50e_kinetics400_rgb.py index f7d7360c0f..738043ac04 100644 --- a/configs/recognition/tsm/tsm_nl_embedded_gaussian_r50_1x1x8_50e_kinetics400_rgb.py +++ b/configs/recognition/tsm/tsm_nl_embedded_gaussian_r50_1x1x8_50e_kinetics400_rgb.py @@ -72,7 +72,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_nl_gaussian_r50_1x1x8_50e_kinetics400_rgb.py b/configs/recognition/tsm/tsm_nl_gaussian_r50_1x1x8_50e_kinetics400_rgb.py index 5770e50c92..9516e93b05 100644 --- a/configs/recognition/tsm/tsm_nl_gaussian_r50_1x1x8_50e_kinetics400_rgb.py +++ b/configs/recognition/tsm/tsm_nl_gaussian_r50_1x1x8_50e_kinetics400_rgb.py @@ -72,7 +72,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r101_1x1x8_50e_sthv2_rgb.py b/configs/recognition/tsm/tsm_r101_1x1x8_50e_sthv2_rgb.py index a4c5ce7d41..1926a975ba 100644 --- a/configs/recognition/tsm/tsm_r101_1x1x8_50e_sthv2_rgb.py +++ b/configs/recognition/tsm/tsm_r101_1x1x8_50e_sthv2_rgb.py @@ -50,10 +50,11 @@ clip_len=1, frame_interval=1, num_clips=8, + twice_sample=True, test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), + dict(type='ThreeCrop', crop_size=256), dict(type='Normalize', **img_norm_cfg), dict(type='FormatShape', input_format='NCHW'), dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), @@ -61,7 +62,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_1x1x16_50e_kinetics400_rgb.py b/configs/recognition/tsm/tsm_r50_1x1x16_50e_kinetics400_rgb.py index cf067d6728..7dcf579f21 100644 --- a/configs/recognition/tsm/tsm_r50_1x1x16_50e_kinetics400_rgb.py +++ b/configs/recognition/tsm/tsm_r50_1x1x16_50e_kinetics400_rgb.py @@ -65,7 +65,8 @@ ] data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_1x1x16_50e_sthv1_rgb.py b/configs/recognition/tsm/tsm_r50_1x1x16_50e_sthv1_rgb.py index 590a8d1ac9..8ca1b6b0c4 100644 --- a/configs/recognition/tsm/tsm_r50_1x1x16_50e_sthv1_rgb.py +++ b/configs/recognition/tsm/tsm_r50_1x1x16_50e_sthv1_rgb.py @@ -55,10 +55,11 @@ clip_len=1, frame_interval=1, num_clips=16, + twice_sample=True, test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), + dict(type='ThreeCrop', crop_size=256), dict(type='Normalize', **img_norm_cfg), dict(type='FormatShape', input_format='NCHW'), dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), @@ -66,7 +67,8 @@ ] data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_1x1x16_50e_sthv2_rgb.py b/configs/recognition/tsm/tsm_r50_1x1x16_50e_sthv2_rgb.py index 04bd982d77..f930f1c244 100644 --- a/configs/recognition/tsm/tsm_r50_1x1x16_50e_sthv2_rgb.py +++ b/configs/recognition/tsm/tsm_r50_1x1x16_50e_sthv2_rgb.py @@ -55,10 +55,11 @@ clip_len=1, frame_interval=1, num_clips=16, + twice_sample=True, test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), + dict(type='ThreeCrop', crop_size=256), dict(type='Normalize', **img_norm_cfg), dict(type='FormatShape', input_format='NCHW'), dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), @@ -66,7 +67,8 @@ ] data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_1x1x8_100e_kinetics400_rgb.py b/configs/recognition/tsm/tsm_r50_1x1x8_100e_kinetics400_rgb.py index bff76cf13d..88b28924f6 100644 --- a/configs/recognition/tsm/tsm_r50_1x1x8_100e_kinetics400_rgb.py +++ b/configs/recognition/tsm/tsm_r50_1x1x8_100e_kinetics400_rgb.py @@ -1,4 +1,4 @@ -_base_ = ['./tsm_r50_1x1x8_50e_kinetics400_rgb'] +_base_ = ['./tsm_r50_1x1x8_50e_kinetics400_rgb.py'] optimizer_config = dict(grad_clip=dict(max_norm=20, norm_type=2)) lr_config = dict(policy='step', step=[40, 80]) diff --git a/configs/recognition/tsm/tsm_r50_1x1x8_50e_jester_rgb.py b/configs/recognition/tsm/tsm_r50_1x1x8_50e_jester_rgb.py index 2893df0bd1..4c1daf1d49 100644 --- a/configs/recognition/tsm/tsm_r50_1x1x8_50e_jester_rgb.py +++ b/configs/recognition/tsm/tsm_r50_1x1x8_50e_jester_rgb.py @@ -60,7 +60,7 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), test_dataloader=dict(videos_per_gpu=1), train=dict( diff --git a/configs/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py b/configs/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py index 2e0a0520ac..76195eb83e 100644 --- a/configs/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py +++ b/configs/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py @@ -62,7 +62,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_1x1x8_50e_sthv1_rgb.py b/configs/recognition/tsm/tsm_r50_1x1x8_50e_sthv1_rgb.py index 4967fa23ac..e57a5b020c 100644 --- a/configs/recognition/tsm/tsm_r50_1x1x8_50e_sthv1_rgb.py +++ b/configs/recognition/tsm/tsm_r50_1x1x8_50e_sthv1_rgb.py @@ -53,10 +53,11 @@ clip_len=1, frame_interval=1, num_clips=8, + twice_sample=True, test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), + dict(type='ThreeCrop', crop_size=256), dict(type='Normalize', **img_norm_cfg), dict(type='FormatShape', input_format='NCHW'), dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), @@ -64,7 +65,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_1x1x8_50e_sthv2_rgb.py b/configs/recognition/tsm/tsm_r50_1x1x8_50e_sthv2_rgb.py index ba60196089..c51ac187c5 100644 --- a/configs/recognition/tsm/tsm_r50_1x1x8_50e_sthv2_rgb.py +++ b/configs/recognition/tsm/tsm_r50_1x1x8_50e_sthv2_rgb.py @@ -53,10 +53,11 @@ clip_len=1, frame_interval=1, num_clips=8, + twice_sample=True, test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), + dict(type='ThreeCrop', crop_size=256), dict(type='Normalize', **img_norm_cfg), dict(type='FormatShape', input_format='NCHW'), dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), @@ -64,7 +65,8 @@ ] data = dict( videos_per_gpu=6, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_cutmix_1x1x8_50e_sthv1_rgb.py b/configs/recognition/tsm/tsm_r50_cutmix_1x1x8_50e_sthv1_rgb.py index abf672adc2..cac9dbb75c 100644 --- a/configs/recognition/tsm/tsm_r50_cutmix_1x1x8_50e_sthv1_rgb.py +++ b/configs/recognition/tsm/tsm_r50_cutmix_1x1x8_50e_sthv1_rgb.py @@ -85,7 +85,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_dense_1x1x8_100e_kinetics400_rgb.py b/configs/recognition/tsm/tsm_r50_dense_1x1x8_100e_kinetics400_rgb.py index 96c050633c..8955c8a74f 100644 --- a/configs/recognition/tsm/tsm_r50_dense_1x1x8_100e_kinetics400_rgb.py +++ b/configs/recognition/tsm/tsm_r50_dense_1x1x8_100e_kinetics400_rgb.py @@ -62,8 +62,9 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, - val_dataloader=dict(videos_per_gpu=4), + workers_per_gpu=2, + val_dataloader=dict(videos_per_gpu=1), + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_flip_1x1x8_50e_sthv1_rgb.py b/configs/recognition/tsm/tsm_r50_flip_1x1x8_50e_sthv1_rgb.py index 00f40cbd58..9b5199a7d0 100644 --- a/configs/recognition/tsm/tsm_r50_flip_1x1x8_50e_sthv1_rgb.py +++ b/configs/recognition/tsm/tsm_r50_flip_1x1x8_50e_sthv1_rgb.py @@ -57,10 +57,11 @@ clip_len=1, frame_interval=1, num_clips=8, + twice_sample=True, test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), + dict(type='ThreeCrop', crop_size=256), dict(type='Normalize', **img_norm_cfg), dict(type='FormatShape', input_format='NCHW'), dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), @@ -68,7 +69,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_flip_randaugment_1x1x8_50e_sthv1_rgb.py b/configs/recognition/tsm/tsm_r50_flip_randaugment_1x1x8_50e_sthv1_rgb.py index d03ba632b4..11ae99c946 100644 --- a/configs/recognition/tsm/tsm_r50_flip_randaugment_1x1x8_50e_sthv1_rgb.py +++ b/configs/recognition/tsm/tsm_r50_flip_randaugment_1x1x8_50e_sthv1_rgb.py @@ -58,10 +58,11 @@ clip_len=1, frame_interval=1, num_clips=8, + twice_sample=True, test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), + dict(type='ThreeCrop', crop_size=256), dict(type='Normalize', **img_norm_cfg), dict(type='FormatShape', input_format='NCHW'), dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), @@ -69,7 +70,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb.py b/configs/recognition/tsm/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb.py index 9b600feda8..61004a5bd4 100644 --- a/configs/recognition/tsm/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb.py +++ b/configs/recognition/tsm/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb.py @@ -68,7 +68,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_mixup_1x1x8_50e_sthv1_rgb.py b/configs/recognition/tsm/tsm_r50_mixup_1x1x8_50e_sthv1_rgb.py index 73d6321081..24864ec229 100644 --- a/configs/recognition/tsm/tsm_r50_mixup_1x1x8_50e_sthv1_rgb.py +++ b/configs/recognition/tsm/tsm_r50_mixup_1x1x8_50e_sthv1_rgb.py @@ -84,7 +84,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_ptv_augmix_1x1x8_50e_sthv1_rgb.py b/configs/recognition/tsm/tsm_r50_ptv_augmix_1x1x8_50e_sthv1_rgb.py index c4540ee855..7b39be4964 100644 --- a/configs/recognition/tsm/tsm_r50_ptv_augmix_1x1x8_50e_sthv1_rgb.py +++ b/configs/recognition/tsm/tsm_r50_ptv_augmix_1x1x8_50e_sthv1_rgb.py @@ -54,6 +54,7 @@ clip_len=1, frame_interval=1, num_clips=8, + twice_sample=True, test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), @@ -65,7 +66,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_ptv_randaugment_1x1x8_50e_sthv1_rgb.py b/configs/recognition/tsm/tsm_r50_ptv_randaugment_1x1x8_50e_sthv1_rgb.py index 75eac07094..a7a8346a78 100644 --- a/configs/recognition/tsm/tsm_r50_ptv_randaugment_1x1x8_50e_sthv1_rgb.py +++ b/configs/recognition/tsm/tsm_r50_ptv_randaugment_1x1x8_50e_sthv1_rgb.py @@ -54,6 +54,7 @@ clip_len=1, frame_interval=1, num_clips=8, + twice_sample=True, test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), @@ -65,7 +66,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_randaugment_1x1x8_50e_sthv1_rgb.py b/configs/recognition/tsm/tsm_r50_randaugment_1x1x8_50e_sthv1_rgb.py index 448908c5b4..83ba457bb0 100644 --- a/configs/recognition/tsm/tsm_r50_randaugment_1x1x8_50e_sthv1_rgb.py +++ b/configs/recognition/tsm/tsm_r50_randaugment_1x1x8_50e_sthv1_rgb.py @@ -54,10 +54,11 @@ clip_len=1, frame_interval=1, num_clips=8, + twice_sample=True, test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), + dict(type='ThreeCrop', crop_size=256), dict(type='Normalize', **img_norm_cfg), dict(type='FormatShape', input_format='NCHW'), dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), @@ -65,7 +66,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_video_1x1x16_50e_diving48_rgb.py b/configs/recognition/tsm/tsm_r50_video_1x1x16_50e_diving48_rgb.py index ee348e8999..6871f53817 100644 --- a/configs/recognition/tsm/tsm_r50_video_1x1x16_50e_diving48_rgb.py +++ b/configs/recognition/tsm/tsm_r50_video_1x1x16_50e_diving48_rgb.py @@ -71,7 +71,8 @@ ] data = dict( videos_per_gpu=4, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_video_1x1x8_50e_diving48_rgb.py b/configs/recognition/tsm/tsm_r50_video_1x1x8_50e_diving48_rgb.py index 3d4a439439..65609d21ec 100644 --- a/configs/recognition/tsm/tsm_r50_video_1x1x8_50e_diving48_rgb.py +++ b/configs/recognition/tsm/tsm_r50_video_1x1x8_50e_diving48_rgb.py @@ -69,7 +69,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsm/tsm_r50_video_1x1x8_50e_kinetics400_rgb.py b/configs/recognition/tsm/tsm_r50_video_1x1x8_50e_kinetics400_rgb.py index 9cc6fc34fc..3e34c822c9 100644 --- a/configs/recognition/tsm/tsm_r50_video_1x1x8_50e_kinetics400_rgb.py +++ b/configs/recognition/tsm/tsm_r50_video_1x1x8_50e_kinetics400_rgb.py @@ -65,7 +65,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/custom_backbones/tsn_dense161_320p_1x1x3_100e_kinetics400_rgb.py b/configs/recognition/tsn/custom_backbones/tsn_dense161_320p_1x1x3_100e_kinetics400_rgb.py index 41f1257364..d4b5051083 100644 --- a/configs/recognition/tsn/custom_backbones/tsn_dense161_320p_1x1x3_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/custom_backbones/tsn_dense161_320p_1x1x3_100e_kinetics400_rgb.py @@ -72,7 +72,8 @@ ] data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/custom_backbones/tsn_rn101_32x4d_320p_1x1x3_100e_kinetics400_rgb.py b/configs/recognition/tsn/custom_backbones/tsn_rn101_32x4d_320p_1x1x3_100e_kinetics400_rgb.py index e0d249f63f..978cb5bc9d 100644 --- a/configs/recognition/tsn/custom_backbones/tsn_rn101_32x4d_320p_1x1x3_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/custom_backbones/tsn_rn101_32x4d_320p_1x1x3_100e_kinetics400_rgb.py @@ -79,7 +79,8 @@ ] data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/custom_backbones/tsn_swin_transformer_video_320p_1x1x3_100e_kinetics400_rgb.py b/configs/recognition/tsn/custom_backbones/tsn_swin_transformer_video_320p_1x1x3_100e_kinetics400_rgb.py index 296aa194b1..dfe70170ee 100644 --- a/configs/recognition/tsn/custom_backbones/tsn_swin_transformer_video_320p_1x1x3_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/custom_backbones/tsn_swin_transformer_video_320p_1x1x3_100e_kinetics400_rgb.py @@ -74,8 +74,8 @@ ] data = dict( videos_per_gpu=24, - workers_per_gpu=4, - test_dataloader=dict(videos_per_gpu=4), + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_256p_1x1x3_100e_kinetics400_rgb.py b/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_256p_1x1x3_100e_kinetics400_rgb.py index 10e74c3791..bb0a5fe333 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_256p_1x1x3_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_256p_1x1x3_100e_kinetics400_rgb.py @@ -62,7 +62,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb.py b/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb.py index ab6a31d04d..6b77944ee0 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb.py @@ -62,7 +62,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb.py b/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb.py index 6ab7806e35..897fb05f90 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb.py @@ -62,7 +62,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_256p_1x1x3_100e_kinetics400_rgb.py b/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_256p_1x1x3_100e_kinetics400_rgb.py index 061cc6db3d..3d9e8ca547 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_256p_1x1x3_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_256p_1x1x3_100e_kinetics400_rgb.py @@ -57,7 +57,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb.py b/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb.py index 957dd20165..c35a32e4e7 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb.py @@ -57,7 +57,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb.py b/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb.py index 02dccc7f3d..968bfc6f38 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb.py @@ -57,7 +57,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/data_benchmark/tsn_r50_test_256p_1x1x25_10crop_100e_kinetics400_rgb.py b/configs/recognition/tsn/data_benchmark/tsn_r50_test_256p_1x1x25_10crop_100e_kinetics400_rgb.py index 378572d72f..bb4da3990f 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_r50_test_256p_1x1x25_10crop_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_test_256p_1x1x25_10crop_100e_kinetics400_rgb.py @@ -22,7 +22,8 @@ dict(type='ToTensor', keys=['imgs']) ] data = dict( - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), test=dict( type=dataset_type, ann_file=ann_file_test, diff --git a/configs/recognition/tsn/data_benchmark/tsn_r50_test_256p_1x1x25_3crop_100e_kinetics400_rgb.py b/configs/recognition/tsn/data_benchmark/tsn_r50_test_256p_1x1x25_3crop_100e_kinetics400_rgb.py index 4f9f39073c..82f1d3eabe 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_r50_test_256p_1x1x25_3crop_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_test_256p_1x1x25_3crop_100e_kinetics400_rgb.py @@ -22,7 +22,8 @@ dict(type='ToTensor', keys=['imgs']) ] data = dict( - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), test=dict( type=dataset_type, ann_file=ann_file_test, diff --git a/configs/recognition/tsn/data_benchmark/tsn_r50_test_320p_1x1x25_10crop_100e_kinetics400_rgb.py b/configs/recognition/tsn/data_benchmark/tsn_r50_test_320p_1x1x25_10crop_100e_kinetics400_rgb.py index fa84d042d5..74aeac51e0 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_r50_test_320p_1x1x25_10crop_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_test_320p_1x1x25_10crop_100e_kinetics400_rgb.py @@ -22,7 +22,8 @@ dict(type='ToTensor', keys=['imgs']) ] data = dict( - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), test=dict( type=dataset_type, ann_file=ann_file_test, diff --git a/configs/recognition/tsn/data_benchmark/tsn_r50_test_320p_1x1x25_3crop_100e_kinetics400_rgb.py b/configs/recognition/tsn/data_benchmark/tsn_r50_test_320p_1x1x25_3crop_100e_kinetics400_rgb.py index 8f37ff246c..ba35eb5922 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_r50_test_320p_1x1x25_3crop_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_test_320p_1x1x25_3crop_100e_kinetics400_rgb.py @@ -22,7 +22,8 @@ dict(type='ToTensor', keys=['imgs']) ] data = dict( - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), test=dict( type=dataset_type, ann_file=ann_file_test, diff --git a/configs/recognition/tsn/data_benchmark/tsn_r50_test_340x256_1x1x25_10crop_100e_kinetics400_rgb.py b/configs/recognition/tsn/data_benchmark/tsn_r50_test_340x256_1x1x25_10crop_100e_kinetics400_rgb.py index eb38dc9f29..ad900cd342 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_r50_test_340x256_1x1x25_10crop_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_test_340x256_1x1x25_10crop_100e_kinetics400_rgb.py @@ -22,7 +22,8 @@ dict(type='ToTensor', keys=['imgs']) ] data = dict( - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), test=dict( type=dataset_type, ann_file=ann_file_test, diff --git a/configs/recognition/tsn/data_benchmark/tsn_r50_test_340x256_1x1x25_3crop_100e_kinetics400_rgb.py b/configs/recognition/tsn/data_benchmark/tsn_r50_test_340x256_1x1x25_3crop_100e_kinetics400_rgb.py index 95584c8ef6..980259ecbd 100644 --- a/configs/recognition/tsn/data_benchmark/tsn_r50_test_340x256_1x1x25_3crop_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/data_benchmark/tsn_r50_test_340x256_1x1x25_3crop_100e_kinetics400_rgb.py @@ -22,7 +22,8 @@ dict(type='ToTensor', keys=['imgs']) ] data = dict( - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), test=dict( type=dataset_type, ann_file=ann_file_test, diff --git a/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_action_rgb.py b/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_action_rgb.py index 067063dfd6..77df841a88 100644 --- a/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_action_rgb.py +++ b/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_action_rgb.py @@ -71,6 +71,7 @@ data = dict( videos_per_gpu=32, workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_attribute_rgb.py b/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_attribute_rgb.py index 2e8369a79e..bdee7f32b7 100644 --- a/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_attribute_rgb.py +++ b/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_attribute_rgb.py @@ -71,6 +71,7 @@ data = dict( videos_per_gpu=32, workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_concept_rgb.py b/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_concept_rgb.py index 4f33d7706b..11b369bb0f 100644 --- a/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_concept_rgb.py +++ b/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_concept_rgb.py @@ -71,6 +71,7 @@ data = dict( videos_per_gpu=32, workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_event_rgb.py b/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_event_rgb.py index be910fa5f4..bcf5a40171 100644 --- a/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_event_rgb.py +++ b/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_event_rgb.py @@ -71,6 +71,7 @@ data = dict( videos_per_gpu=32, workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_object_rgb.py b/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_object_rgb.py index 4f33d7706b..11b369bb0f 100644 --- a/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_object_rgb.py +++ b/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_object_rgb.py @@ -71,6 +71,7 @@ data = dict( videos_per_gpu=32, workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_scene_rgb.py b/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_scene_rgb.py index fd088f8f82..305e9f27c1 100644 --- a/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_scene_rgb.py +++ b/configs/recognition/tsn/hvu/tsn_r18_1x1x8_100e_hvu_scene_rgb.py @@ -71,6 +71,7 @@ data = dict( videos_per_gpu=32, workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/metafile.yml b/configs/recognition/tsn/metafile.yml index 4a60bd9261..700174155e 100644 --- a/configs/recognition/tsn/metafile.yml +++ b/configs/recognition/tsn/metafile.yml @@ -19,7 +19,7 @@ Models: - Dataset: UCF101 Metrics: Top 1 Accuracy: 83.03 - top5 accuracy: 96.78 + Top 5 Accuracy: 96.78 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_75e_ucf101_rgb/tsn_r50_1x1x3_75e_ucf101_rgb_20201023.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_75e_ucf101_rgb/tsn_r50_1x1x3_75e_ucf101_rgb_20201023.log @@ -41,7 +41,7 @@ Models: - Dataset: Diving48 Metrics: Top 1 Accuracy: 71.27 - top5 accuracy: 95.74 + Top 5 Accuracy: 95.74 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_video_1x1x8_100e_diving48_rgb/20210426_014138.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_video_1x1x8_100e_diving48_rgb/20210426_014138.log @@ -63,7 +63,7 @@ Models: - Dataset: Diving48 Metrics: Top 1 Accuracy: 76.75 - top5 accuracy: 96.95 + Top 5 Accuracy: 96.95 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_video_1x1x16_100e_diving48_rgb/20210426_014103.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_video_1x1x16_100e_diving48_rgb/20210426_014103.log @@ -85,7 +85,7 @@ Models: - Dataset: HMDB51 Metrics: Top 1 Accuracy: 48.95 - top5 accuracy: 80.19 + Top 5 Accuracy: 80.19 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_imagenet_rgb/20201025_231108.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_imagenet_rgb/20201025_231108.log @@ -107,7 +107,7 @@ Models: - Dataset: HMDB51 Metrics: Top 1 Accuracy: 56.08 - top5 accuracy: 84.31 + Top 5 Accuracy: 84.31 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_kinetics400_rgb/20201108_190805.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_kinetics400_rgb/20201108_190805.log @@ -128,7 +128,7 @@ Models: - Dataset: HMDB51 Metrics: Top 1 Accuracy: 54.25 - top5 accuracy: 83.86 + Top 5 Accuracy: 83.86 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_mit_rgb/20201112_170135.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_mit_rgb/20201112_170135.log @@ -151,7 +151,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 70.6 - top5 accuracy: 89.26 + Top 5 Accuracy: 89.26 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/20200614_063526.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/20200614_063526.log @@ -174,7 +174,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 70.42 - top5 accuracy: 89.03 + Top 5 Accuracy: 89.03 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_256p_1x1x3_100e_kinetics400_rgb/20200725_031325.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_256p_1x1x3_100e_kinetics400_rgb/20200725_031325.log @@ -197,7 +197,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 70.18 - top5 accuracy: 89.1 + Top 5 Accuracy: 89.1 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_dense_1x1x5_100e_kinetics400_rgb/20200627_105310.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_dense_1x1x5_100e_kinetics400_rgb/20200627_105310.log @@ -220,7 +220,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 70.91 - top5 accuracy: 89.51 + Top 5 Accuracy: 89.51 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x3_100e_kinetics400_rgb/tsn_r50_f3_kinetics400_shortedge_70.9_89.5.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x3_100e_kinetics400_rgb/tsn_r50_f3_kinetics400_shortedge_70.9_89.5.log @@ -243,7 +243,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 55.7 - top5 accuracy: 79.85 + Top 5 Accuracy: 79.85 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x3_110e_kinetics400_flow/tsn_r50_f3_kinetics400_flow_shortedge_55.7_79.9.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x3_110e_kinetics400_flow/tsn_r50_f3_kinetics400_flow_shortedge_55.7_79.9.log @@ -266,7 +266,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 71.8 - top5 accuracy: 90.17 + Top 5 Accuracy: 90.17 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_256p_1x1x8_100e_kinetics400_rgb/20200815_173413.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_256p_1x1x8_100e_kinetics400_rgb/20200815_173413.log @@ -289,7 +289,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 72.41 - top5 accuracy: 90.55 + Top 5 Accuracy: 90.55 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x8_100e_kinetics400_rgb/tsn_r50_f8_kinetics400_shortedge_72.4_90.6.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x8_100e_kinetics400_rgb/tsn_r50_f8_kinetics400_shortedge_72.4_90.6.log @@ -312,7 +312,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 57.76 - top5 accuracy: 80.99 + Top 5 Accuracy: 80.99 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x8_110e_kinetics400_flow/tsn_r50_f8_kinetics400_flow_shortedge_57.8_81.0.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x8_110e_kinetics400_flow/tsn_r50_f8_kinetics400_flow_shortedge_57.8_81.0.log @@ -335,7 +335,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 71.11 - top5 accuracy: 90.04 + Top 5 Accuracy: 90.04 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_video_320p_1x1x3_100e_kinetics400_rgb/tsn_r50_video_320p_1x1x3_100e_kinetics400_rgb_20201014.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_video_320p_1x1x3_100e_kinetics400_rgb/tsn_r50_video_320p_1x1x3_100e_kinetics400_rgb_20201014.log @@ -358,7 +358,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 70.77 - top5 accuracy: 89.3 + Top 5 Accuracy: 89.3 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_dense_1x1x8_100e_kinetics400_rgb/20200606_003901.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_dense_1x1x8_100e_kinetics400_rgb/20200606_003901.log @@ -381,7 +381,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 71.79 - top5 accuracy: 90.25 + Top 5 Accuracy: 90.25 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb/tsn_r50_video_2d_1x1x8_100e_kinetics400_rgb.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb/tsn_r50_video_2d_1x1x8_100e_kinetics400_rgb.log @@ -404,7 +404,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 70.4 - top5 accuracy: 89.12 + Top 5 Accuracy: 89.12 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_video_dense_1x1x8_100e_kinetics400_rgb/tsn_r50_video_2d_1x1x8_dense_100e_kinetics400_rgb.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_video_dense_1x1x8_100e_kinetics400_rgb/tsn_r50_video_2d_1x1x8_dense_100e_kinetics400_rgb.log @@ -427,7 +427,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 73.43 - top5 accuracy: 91.01 + Top 5 Accuracy: 91.01 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/custom_backbones/tsn_rn101_32x4d_320p_1x1x3_100e_kinetics400_rgb.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/custom_backbones/tsn_rn101_32x4d_320p_1x1x3_100e_kinetics400_rgb.log @@ -450,7 +450,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 72.78 - top5 accuracy: 90.75 + Top 5 Accuracy: 90.75 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/custom_backbones/tsn_dense161_320p_1x1x3_100e_kinetics400_rgb/tsn_dense161_320p_1x1x3_100e_kinetics400_rgb.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/custom_backbones/tsn_dense161_320p_1x1x3_100e_kinetics400_rgb/tsn_dense161_320p_1x1x3_100e_kinetics400_rgb.log @@ -472,7 +472,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 73.6 - top5 accuracy: 91.0 + Top 5 Accuracy: 91.0 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/tsn/omni/tsn_imagenet_pretrained_r50_omni_1x1x3_kinetics400_rgb_20200926-54192355.pth - Config: configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py @@ -492,7 +492,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 73.1 - top5 accuracy: 90.4 + Top 5 Accuracy: 90.4 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/tsn/omni/tsn_1G1B_pretrained_r50_without_omni_1x1x3_kinetics400_rgb_20200926-c133dd49.pth - Config: configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py @@ -512,7 +512,7 @@ Models: - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 75.7 - top5 accuracy: 91.9 + Top 5 Accuracy: 91.9 Task: Action Recognition Weights: https://download.openmmlab.com/mmaction/recognition/tsn/omni/tsn_1G1B_pretrained_r50_omni_1x1x3_kinetics400_rgb_20200926-2863fed0.pth - Config: configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics600_rgb.py @@ -533,7 +533,7 @@ Models: - Dataset: Kinetics-600 Metrics: Top 1 Accuracy: 74.8 - top5 accuracy: 92.3 + Top 5 Accuracy: 92.3 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics600_rgb/tsn_r50_video_1x1x8_100e_kinetics600_rgb_20201015.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics600_rgb/tsn_r50_video_1x1x8_100e_kinetics600_rgb_20201015.log @@ -556,7 +556,7 @@ Models: - Dataset: Kinetics-700 Metrics: Top 1 Accuracy: 61.7 - top5 accuracy: 83.6 + Top 5 Accuracy: 83.6 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics700_rgb/tsn_r50_video_1x1x8_100e_kinetics700_rgb_20201015.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics700_rgb/tsn_r50_video_1x1x8_100e_kinetics700_rgb_20201015.log @@ -579,7 +579,7 @@ Models: - Dataset: SthV1 Metrics: Top 1 Accuracy: 18.55 - top5 accuracy: 44.8 + Top 5 Accuracy: 44.8 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x8_50e_sthv1_rgb/tsn_r50_f8_sthv1_18.1_45.0.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x8_50e_sthv1_rgb/tsn_sthv1.log @@ -602,7 +602,7 @@ Models: - Dataset: SthV1 Metrics: Top 1 Accuracy: 15.77 - top5 accuracy: 39.85 + Top 5 Accuracy: 39.85 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x16_50e_sthv1_rgb/20200614_211932.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x16_50e_sthv1_rgb/20200614_211932.log @@ -625,7 +625,7 @@ Models: - Dataset: SthV2 Metrics: Top 1 Accuracy: 32.97 - top5 accuracy: 63.62 + Top 5 Accuracy: 63.62 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x8_50e_sthv2_rgb/20200915_114139.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x8_50e_sthv2_rgb/20200915_114139.log @@ -648,7 +648,7 @@ Models: - Dataset: SthV2 Metrics: Top 1 Accuracy: 27.21 - top5 accuracy: 55.84 + Top 5 Accuracy: 55.84 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x16_50e_sthv2_rgb/20200917_105855.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x16_50e_sthv2_rgb/20200917_105855.log @@ -671,7 +671,7 @@ Models: - Dataset: MiT Metrics: Top 1 Accuracy: 26.84 - top5 accuracy: 51.6 + Top 5 Accuracy: 51.6 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x6_100e_mit_rgb/tsn_r50_f6_mit_26.8_51.6.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x6_100e_mit_rgb/tsn_mit.log @@ -716,7 +716,7 @@ Models: - Dataset: ActivityNet v1.3 Metrics: Top 1 Accuracy: 73.93 - top5 accuracy: 93.44 + Top 5 Accuracy: 93.44 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_video_rgb/20210228_223327.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_video_rgb/20210228_223327.log @@ -739,7 +739,7 @@ Models: - Dataset: ActivityNet v1.3 Metrics: Top 1 Accuracy: 76.9 - top5 accuracy: 94.47 + Top 5 Accuracy: 94.47 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb/20210217_181313.log.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb/20210217_181313.log @@ -762,7 +762,7 @@ Models: - Dataset: ActivityNet v1.3 Metrics: Top 1 Accuracy: 57.51 - top5 accuracy: 83.02 + Top 5 Accuracy: 83.02 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_video_flow/tsn_r50_320p_1x1x8_150e_activitynet_video_flow_20200804.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_video_flow/tsn_r50_320p_1x1x8_150e_activitynet_video_flow_20200804.log @@ -785,7 +785,7 @@ Models: - Dataset: ActivityNet v1.3 Metrics: Top 1 Accuracy: 59.51 - top5 accuracy: 82.69 + Top 5 Accuracy: 82.69 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow_20200804.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow_20200804.log @@ -941,10 +941,10 @@ Models: Training Resources: 8 GPUs Name: tsn_swin_transformer_video_320p_1x1x3_100e_kinetics400_rgb Results: - - Dataset: Kinetics400 + - Dataset: Kinetics-400 Metrics: Top 1 Accuracy: 77.51 - top5 accuracy: 92.92 + Top 5 Accuracy: 92.92 Task: Action Recognition Training Json Log: https://download.openmmlab.com/mmaction/recognition/tsn/custom_backbones/tsn_swin_transformer_video_320p_1x1x3_100e_kinetics400_rgb/tsn_swin_transformer_video_320p_1x1x3_100e_kinetics400_rgb.json Training Log: https://download.openmmlab.com/mmaction/recognition/tsn/custom_backbones/tsn_swin_transformer_video_320p_1x1x3_100e_kinetics400_rgb/tsn_swin_transformer_video_320p_1x1x3_100e_kinetics400_rgb.log diff --git a/configs/recognition/tsn/tsn_fp16_r50_1x1x3_100e_kinetics400_rgb.py b/configs/recognition/tsn/tsn_fp16_r50_1x1x3_100e_kinetics400_rgb.py index a7ca319174..5f73da4ae0 100644 --- a/configs/recognition/tsn/tsn_fp16_r50_1x1x3_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/tsn_fp16_r50_1x1x3_100e_kinetics400_rgb.py @@ -61,7 +61,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r101_1x1x5_50e_mmit_rgb.py b/configs/recognition/tsn/tsn_r101_1x1x5_50e_mmit_rgb.py index f6a938a938..4fcaf4b796 100644 --- a/configs/recognition/tsn/tsn_r101_1x1x5_50e_mmit_rgb.py +++ b/configs/recognition/tsn/tsn_r101_1x1x5_50e_mmit_rgb.py @@ -20,10 +20,9 @@ dropout_ratio=0.5, init_std=0.01, multi_class=True, - label_smooth_eps=0)) -# model training and testing settings -train_cfg = None -test_cfg = dict(average_clips=None) + label_smooth_eps=0), + train_cfg=None, + test_cfg=dict(average_clips=None)) # dataset settings dataset_type = 'RawframeDataset' @@ -84,7 +83,8 @@ data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_1x1x16_50e_sthv1_rgb.py b/configs/recognition/tsn/tsn_r50_1x1x16_50e_sthv1_rgb.py index 8ddb99f79e..9b5de9f691 100644 --- a/configs/recognition/tsn/tsn_r50_1x1x16_50e_sthv1_rgb.py +++ b/configs/recognition/tsn/tsn_r50_1x1x16_50e_sthv1_rgb.py @@ -61,7 +61,8 @@ ] data = dict( videos_per_gpu=4, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_1x1x16_50e_sthv2_rgb.py b/configs/recognition/tsn/tsn_r50_1x1x16_50e_sthv2_rgb.py index 410dbe6b1b..1d8b3e0143 100644 --- a/configs/recognition/tsn/tsn_r50_1x1x16_50e_sthv2_rgb.py +++ b/configs/recognition/tsn/tsn_r50_1x1x16_50e_sthv2_rgb.py @@ -29,7 +29,12 @@ dict(type='ToTensor', keys=['imgs', 'label']) ] val_pipeline = [ - dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=16), + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=16, + test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), dict(type='CenterCrop', crop_size=224), @@ -39,10 +44,15 @@ dict(type='ToTensor', keys=['imgs']) ] test_pipeline = [ - dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=16), + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=16, + test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), - dict(type='TenCrop', crop_size=224), + dict(type='ThreeCrop', crop_size=256), dict(type='Normalize', **img_norm_cfg), dict(type='FormatShape', input_format='NCHW'), dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), @@ -50,7 +60,8 @@ ] data = dict( videos_per_gpu=4, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py b/configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py index 3705de2a2c..1eca1ae6aa 100644 --- a/configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py @@ -61,7 +61,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_1x1x3_75e_ucf101_rgb.py b/configs/recognition/tsn/tsn_r50_1x1x3_75e_ucf101_rgb.py index 0c98df7039..e902eba955 100644 --- a/configs/recognition/tsn/tsn_r50_1x1x3_75e_ucf101_rgb.py +++ b/configs/recognition/tsn/tsn_r50_1x1x3_75e_ucf101_rgb.py @@ -57,7 +57,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_1x1x6_100e_mit_rgb.py b/configs/recognition/tsn/tsn_r50_1x1x6_100e_mit_rgb.py index de706a4278..5f8a15419d 100644 --- a/configs/recognition/tsn/tsn_r50_1x1x6_100e_mit_rgb.py +++ b/configs/recognition/tsn/tsn_r50_1x1x6_100e_mit_rgb.py @@ -64,7 +64,8 @@ ] data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_imagenet_rgb.py b/configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_imagenet_rgb.py index 9b92b5cfad..b881817293 100644 --- a/configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_imagenet_rgb.py +++ b/configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_imagenet_rgb.py @@ -61,7 +61,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_kinetics400_rgb.py b/configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_kinetics400_rgb.py index 93588f034f..6b3230ec2d 100644 --- a/configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_kinetics400_rgb.py +++ b/configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_kinetics400_rgb.py @@ -61,7 +61,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_mit_rgb.py b/configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_mit_rgb.py index 51fb545379..83081300ed 100644 --- a/configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_mit_rgb.py +++ b/configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_mit_rgb.py @@ -60,6 +60,7 @@ dict(type='ToTensor', keys=['imgs']) ] data = dict( + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_1x1x8_50e_sthv1_rgb.py b/configs/recognition/tsn/tsn_r50_1x1x8_50e_sthv1_rgb.py index 54bfb8fb59..0147490a42 100644 --- a/configs/recognition/tsn/tsn_r50_1x1x8_50e_sthv1_rgb.py +++ b/configs/recognition/tsn/tsn_r50_1x1x8_50e_sthv1_rgb.py @@ -67,7 +67,8 @@ ] data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_1x1x8_50e_sthv2_rgb.py b/configs/recognition/tsn/tsn_r50_1x1x8_50e_sthv2_rgb.py index b9035e12b8..6b33b98a1e 100644 --- a/configs/recognition/tsn/tsn_r50_1x1x8_50e_sthv2_rgb.py +++ b/configs/recognition/tsn/tsn_r50_1x1x8_50e_sthv2_rgb.py @@ -32,7 +32,12 @@ dict(type='ToTensor', keys=['imgs', 'label']) ] val_pipeline = [ - dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=8), + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=8, + test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), dict(type='CenterCrop', crop_size=224), @@ -42,10 +47,15 @@ dict(type='ToTensor', keys=['imgs']) ] test_pipeline = [ - dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=8), + dict( + type='SampleFrames', + clip_len=1, + frame_interval=1, + num_clips=8, + test_mode=True), dict(type='RawFrameDecode'), dict(type='Resize', scale=(-1, 256)), - dict(type='TenCrop', crop_size=224), + dict(type='ThreeCrop', crop_size=256), dict(type='Normalize', **img_norm_cfg), dict(type='FormatShape', input_format='NCHW'), dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), @@ -54,6 +64,7 @@ data = dict( videos_per_gpu=16, workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_320p_1x1x3_100e_kinetics400_rgb.py b/configs/recognition/tsn/tsn_r50_320p_1x1x3_100e_kinetics400_rgb.py index 5ca6bf89d8..64554a7934 100644 --- a/configs/recognition/tsn/tsn_r50_320p_1x1x3_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/tsn_r50_320p_1x1x3_100e_kinetics400_rgb.py @@ -53,7 +53,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_320p_1x1x3_110e_kinetics400_flow.py b/configs/recognition/tsn/tsn_r50_320p_1x1x3_110e_kinetics400_flow.py index 00b18daf9b..761d214aad 100644 --- a/configs/recognition/tsn/tsn_r50_320p_1x1x3_110e_kinetics400_flow.py +++ b/configs/recognition/tsn/tsn_r50_320p_1x1x3_110e_kinetics400_flow.py @@ -56,7 +56,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_320p_1x1x8_100e_kinetics400_rgb.py b/configs/recognition/tsn/tsn_r50_320p_1x1x8_100e_kinetics400_rgb.py index b3b341baa9..7641b9771f 100644 --- a/configs/recognition/tsn/tsn_r50_320p_1x1x8_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/tsn_r50_320p_1x1x8_100e_kinetics400_rgb.py @@ -56,7 +56,8 @@ ] data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_320p_1x1x8_110e_kinetics400_flow.py b/configs/recognition/tsn/tsn_r50_320p_1x1x8_110e_kinetics400_flow.py index 710416878a..3ca87c708c 100644 --- a/configs/recognition/tsn/tsn_r50_320p_1x1x8_110e_kinetics400_flow.py +++ b/configs/recognition/tsn/tsn_r50_320p_1x1x8_110e_kinetics400_flow.py @@ -56,7 +56,8 @@ ] data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow.py b/configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow.py index 5a019ab8ac..ebb9982850 100644 --- a/configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow.py +++ b/configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow.py @@ -58,7 +58,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_video_flow.py b/configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_video_flow.py index f5f39ad68c..dfab68032f 100644 --- a/configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_video_flow.py +++ b/configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_video_flow.py @@ -58,7 +58,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb.py b/configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb.py index 9321b6e3c4..7ccb2beed5 100644 --- a/configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb.py +++ b/configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb.py @@ -58,7 +58,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_video_rgb.py b/configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_video_rgb.py index 6c3bcc8f29..17f1a7e79c 100644 --- a/configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_video_rgb.py +++ b/configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_video_rgb.py @@ -58,7 +58,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_clip_feature_extraction_1x1x3_rgb.py b/configs/recognition/tsn/tsn_r50_clip_feature_extraction_1x1x3_rgb.py index fec95d8c2f..a64608acfe 100644 --- a/configs/recognition/tsn/tsn_r50_clip_feature_extraction_1x1x3_rgb.py +++ b/configs/recognition/tsn/tsn_r50_clip_feature_extraction_1x1x3_rgb.py @@ -32,6 +32,7 @@ data = dict( videos_per_gpu=1, workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), test=dict( type=dataset_type, ann_file=None, diff --git a/configs/recognition/tsn/tsn_r50_dense_1x1x5_100e_kinetics400_rgb.py b/configs/recognition/tsn/tsn_r50_dense_1x1x5_100e_kinetics400_rgb.py index fa462447fe..e8e498e9df 100644 --- a/configs/recognition/tsn/tsn_r50_dense_1x1x5_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/tsn_r50_dense_1x1x5_100e_kinetics400_rgb.py @@ -65,8 +65,9 @@ ] data = dict( videos_per_gpu=16, - workers_per_gpu=4, - val_dataloader=dict(videos_per_gpu=4), + workers_per_gpu=2, + val_dataloader=dict(videos_per_gpu=1), + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_dense_1x1x8_100e_kinetics400_rgb.py b/configs/recognition/tsn/tsn_r50_dense_1x1x8_100e_kinetics400_rgb.py index a177a0035f..70affa8382 100644 --- a/configs/recognition/tsn/tsn_r50_dense_1x1x8_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/tsn_r50_dense_1x1x8_100e_kinetics400_rgb.py @@ -62,7 +62,8 @@ ] data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_video_1x1x16_100e_diving48_rgb.py b/configs/recognition/tsn/tsn_r50_video_1x1x16_100e_diving48_rgb.py index 9bea133f7b..a2a3e61e1c 100644 --- a/configs/recognition/tsn/tsn_r50_video_1x1x16_100e_diving48_rgb.py +++ b/configs/recognition/tsn/tsn_r50_video_1x1x16_100e_diving48_rgb.py @@ -68,7 +68,8 @@ ] data = dict( videos_per_gpu=4, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_diving48_rgb.py b/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_diving48_rgb.py index d9ba6bf97a..57a8614fd5 100644 --- a/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_diving48_rgb.py +++ b/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_diving48_rgb.py @@ -68,7 +68,8 @@ ] data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py b/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py index bec1b85f13..7e3cf98476 100644 --- a/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py @@ -63,7 +63,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics600_rgb.py b/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics600_rgb.py index 80d54e2a54..687ce2018f 100644 --- a/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics600_rgb.py +++ b/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics600_rgb.py @@ -61,7 +61,8 @@ ] data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics700_rgb.py b/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics700_rgb.py index 00e47c6431..62390025f4 100644 --- a/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics700_rgb.py +++ b/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics700_rgb.py @@ -61,7 +61,8 @@ ] data = dict( videos_per_gpu=12, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_video_320p_1x1x3_100e_kinetics400_rgb.py b/configs/recognition/tsn/tsn_r50_video_320p_1x1x3_100e_kinetics400_rgb.py index 862d287899..ad67dcb74d 100644 --- a/configs/recognition/tsn/tsn_r50_video_320p_1x1x3_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/tsn_r50_video_320p_1x1x3_100e_kinetics400_rgb.py @@ -58,7 +58,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_video_dense_1x1x8_100e_kinetics400_rgb.py b/configs/recognition/tsn/tsn_r50_video_dense_1x1x8_100e_kinetics400_rgb.py index 824df78dc6..7c6d5e820e 100644 --- a/configs/recognition/tsn/tsn_r50_video_dense_1x1x8_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/tsn_r50_video_dense_1x1x8_100e_kinetics400_rgb.py @@ -63,8 +63,9 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, val_dataloader=dict(videos_per_gpu=1), + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_video_imgaug_1x1x8_100e_kinetics400_rgb.py b/configs/recognition/tsn/tsn_r50_video_imgaug_1x1x8_100e_kinetics400_rgb.py index fab95ceb14..c16f7a3001 100644 --- a/configs/recognition/tsn/tsn_r50_video_imgaug_1x1x8_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/tsn_r50_video_imgaug_1x1x8_100e_kinetics400_rgb.py @@ -13,10 +13,9 @@ spatial_type='avg', consensus=dict(type='AvgConsensus', dim=1), dropout_ratio=0.4, - init_std=0.01)) -# model training and testing settings -train_cfg = None -test_cfg = dict(average_clips=None) + init_std=0.01), + train_cfg=None, + test_cfg=dict(average_clips=None)) # dataset settings dataset_type = 'VideoDataset' data_root = 'data/kinetics400/videos_train' @@ -84,7 +83,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition/tsn/tsn_r50_video_mixup_1x1x8_100e_kinetics400_rgb.py b/configs/recognition/tsn/tsn_r50_video_mixup_1x1x8_100e_kinetics400_rgb.py index bef969ad45..4f5f2a3a03 100644 --- a/configs/recognition/tsn/tsn_r50_video_mixup_1x1x8_100e_kinetics400_rgb.py +++ b/configs/recognition/tsn/tsn_r50_video_mixup_1x1x8_100e_kinetics400_rgb.py @@ -83,7 +83,8 @@ ] data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, + test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition_audio/audioonly/audioonly_r50_64x1x1_100e_kinetics400_audio_feature.py b/configs/recognition_audio/audioonly/audioonly_r50_64x1x1_100e_kinetics400_audio_feature.py index cef00ef22b..d8be216e99 100644 --- a/configs/recognition_audio/audioonly/audioonly_r50_64x1x1_100e_kinetics400_audio_feature.py +++ b/configs/recognition_audio/audioonly/audioonly_r50_64x1x1_100e_kinetics400_audio_feature.py @@ -45,7 +45,7 @@ ] data = dict( videos_per_gpu=160, - workers_per_gpu=4, + workers_per_gpu=2, train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition_audio/resnet/metafile.yml b/configs/recognition_audio/resnet/metafile.yml index 3891bd4b16..42ebc2bdce 100644 --- a/configs/recognition_audio/resnet/metafile.yml +++ b/configs/recognition_audio/resnet/metafile.yml @@ -18,7 +18,7 @@ Models: Top 1 Accuracy: 19.7 Top 1 Accuracy [w. RGB]: 71.5 Top 1 Accuracy delta [w. RGB]: 0.39 - top5 accuracy: 35.75 + Top 5 Accuracy: 35.75 top5 accuracy [w. RGB]: 90.18 top5 accuracy delta [w. RGB]: 0.14 Task: Action Recognition diff --git a/configs/recognition_audio/resnet/tsn_r18_64x1x1_100e_kinetics400_audio_feature.py b/configs/recognition_audio/resnet/tsn_r18_64x1x1_100e_kinetics400_audio_feature.py index e2fbd7c777..d8b5c1e6f3 100644 --- a/configs/recognition_audio/resnet/tsn_r18_64x1x1_100e_kinetics400_audio_feature.py +++ b/configs/recognition_audio/resnet/tsn_r18_64x1x1_100e_kinetics400_audio_feature.py @@ -56,7 +56,7 @@ ] data = dict( videos_per_gpu=320, - workers_per_gpu=4, + workers_per_gpu=2, train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/recognition_audio/resnet/tsn_r50_64x1x1_100e_kinetics400_audio.py b/configs/recognition_audio/resnet/tsn_r50_64x1x1_100e_kinetics400_audio.py index f4e42f6cc4..a806dea747 100644 --- a/configs/recognition_audio/resnet/tsn_r50_64x1x1_100e_kinetics400_audio.py +++ b/configs/recognition_audio/resnet/tsn_r50_64x1x1_100e_kinetics400_audio.py @@ -51,7 +51,7 @@ ] data = dict( videos_per_gpu=320, - workers_per_gpu=4, + workers_per_gpu=2, train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_keypoint.py b/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_keypoint.py index a9a3aa5f04..640c67485a 100644 --- a/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_keypoint.py +++ b/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_keypoint.py @@ -85,7 +85,7 @@ ] data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, diff --git a/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_limb.py b/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_limb.py index 31cb0da388..978bb2adcf 100644 --- a/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_limb.py +++ b/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_limb.py @@ -91,7 +91,7 @@ ] data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, diff --git a/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu60_xsub_keypoint.py b/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu60_xsub_keypoint.py index 3863ef7159..47e541115e 100644 --- a/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu60_xsub_keypoint.py +++ b/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu60_xsub_keypoint.py @@ -85,7 +85,7 @@ ] data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, diff --git a/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu60_xsub_limb.py b/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu60_xsub_limb.py index 2c3f47682f..7e98d22dd6 100644 --- a/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu60_xsub_limb.py +++ b/configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu60_xsub_limb.py @@ -91,7 +91,7 @@ ] data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, test_dataloader=dict(videos_per_gpu=1), train=dict( type=dataset_type, diff --git a/demo/mmaction2_tutorial.ipynb b/demo/mmaction2_tutorial.ipynb index 8671ab34d4..24981795f4 100644 --- a/demo/mmaction2_tutorial.ipynb +++ b/demo/mmaction2_tutorial.ipynb @@ -840,7 +840,7 @@ "]\n", "data = dict(\n", " videos_per_gpu=2,\n", - " workers_per_gpu=4,\n", + " workers_per_gpu=2,\n", " train=dict(\n", " type='VideoDataset',\n", " ann_file='kinetics400_tiny/kinetics_tiny_train_video.txt',\n", diff --git a/demo/mmaction2_tutorial_zh-CN.ipynb b/demo/mmaction2_tutorial_zh-CN.ipynb index 0d7bf32b08..501f2b8d50 100644 --- a/demo/mmaction2_tutorial_zh-CN.ipynb +++ b/demo/mmaction2_tutorial_zh-CN.ipynb @@ -629,7 +629,7 @@ "]\n", "data = dict(\n", " videos_per_gpu=2,\n", - " workers_per_gpu=4,\n", + " workers_per_gpu=2,\n", " train=dict(\n", " type='VideoDataset',\n", " ann_file='kinetics400_tiny/kinetics_tiny_train_video.txt',\n", diff --git a/docs/tutorials/1_config.md b/docs/tutorials/1_config.md index 0133ca9ce9..20abeeba3d 100644 --- a/docs/tutorials/1_config.md +++ b/docs/tutorials/1_config.md @@ -370,7 +370,7 @@ which is convenient to conduct various experiments. ] data = dict( # Config of data videos_per_gpu=32, # Batch size of each single GPU - workers_per_gpu=4, # Workers to pre-fetch data for each single GPU + workers_per_gpu=2, # Workers to pre-fetch data for each single GPU train_dataloader=dict( # Additional config of train dataloader drop_last=True), # Whether to drop out the last batch of data in training val_dataloader=dict( # Additional config of validation dataloader @@ -593,7 +593,7 @@ We incorporate modular design into our config system, which is convenient to con data = dict( # Config of data videos_per_gpu=16, # Batch size of each single GPU - workers_per_gpu=4, # Workers to pre-fetch data for each single GPU + workers_per_gpu=2, # Workers to pre-fetch data for each single GPU val_dataloader=dict( # Additional config of validation dataloader videos_per_gpu=1), # Batch size of each single GPU during evaluation train=dict( # Training dataset config @@ -738,7 +738,7 @@ test_pipeline = [ data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/docs/tutorials/3_new_dataset.md b/docs/tutorials/3_new_dataset.md index 1b1d18d37e..223117aa57 100644 --- a/docs/tutorials/3_new_dataset.md +++ b/docs/tutorials/3_new_dataset.md @@ -128,7 +128,7 @@ ann_file_test = 'data/custom/custom_val_list.txt' ... data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/docs_zh_CN/tutorials/1_config.md b/docs_zh_CN/tutorials/1_config.md index a59d765786..d3f5ae9ad0 100644 --- a/docs_zh_CN/tutorials/1_config.md +++ b/docs_zh_CN/tutorials/1_config.md @@ -368,7 +368,7 @@ MMAction2 将模块化设计整合到配置文件系统中,以便执行各类 ] data = dict( # 数据的配置 videos_per_gpu=32, # 单个 GPU 的批大小 - workers_per_gpu=4, # 单个 GPU 的 dataloader 的进程 + workers_per_gpu=2, # 单个 GPU 的 dataloader 的进程 train_dataloader=dict( # 训练过程 dataloader 的额外设置 drop_last=True), # 在训练过程中是否丢弃最后一个批次 val_dataloader=dict( # 验证过程 dataloader 的额外设置 @@ -587,7 +587,7 @@ MMAction2 将模块化设计整合到配置文件系统中,以便于执行各 data = dict( # 数据的配置 videos_per_gpu=16, # 单个 GPU 的批大小 - workers_per_gpu=4, # 单个 GPU 的 dataloader 的进程 + workers_per_gpu=2, # 单个 GPU 的 dataloader 的进程 val_dataloader=dict( # 验证过程 dataloader 的额外设置 videos_per_gpu=1), # 单个 GPU 的批大小 train=dict( # 训练数据集的设置 @@ -729,7 +729,7 @@ test_pipeline = [ data = dict( videos_per_gpu=8, - workers_per_gpu=4, + workers_per_gpu=2, train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/docs_zh_CN/tutorials/3_new_dataset.md b/docs_zh_CN/tutorials/3_new_dataset.md index 19402cb41e..172d73b00d 100644 --- a/docs_zh_CN/tutorials/3_new_dataset.md +++ b/docs_zh_CN/tutorials/3_new_dataset.md @@ -123,7 +123,7 @@ ann_file_test = 'data/custom/custom_val_list.txt' ... data = dict( videos_per_gpu=32, - workers_per_gpu=4, + workers_per_gpu=2, train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/mmaction/datasets/ava_dataset.py b/mmaction/datasets/ava_dataset.py index b071698e34..547cd37204 100644 --- a/mmaction/datasets/ava_dataset.py +++ b/mmaction/datasets/ava_dataset.py @@ -165,15 +165,18 @@ def parse_img_record(self, img_records): while len(img_records) > 0: img_record = img_records[0] num_img_records = len(img_records) - selected_records = list( - filter( - lambda x: np.array_equal(x['entity_box'], img_record[ - 'entity_box']), img_records)) + + selected_records = [ + x for x in img_records + if np.array_equal(x['entity_box'], img_record['entity_box']) + ] + num_selected_records = len(selected_records) - img_records = list( - filter( - lambda x: not np.array_equal(x['entity_box'], img_record[ - 'entity_box']), img_records)) + img_records = [ + x for x in img_records if + not np.array_equal(x['entity_box'], img_record['entity_box']) + ] + assert len(img_records) + num_selected_records == num_img_records bboxes.append(img_record['entity_box']) diff --git a/mmaction/models/heads/misc_head.py b/mmaction/models/heads/misc_head.py index 88f9f203da..a2888a26d8 100644 --- a/mmaction/models/heads/misc_head.py +++ b/mmaction/models/heads/misc_head.py @@ -31,6 +31,7 @@ class ACRNHead(nn.Module): `requires_grad`. Default: dict(type='BN2d', requires_grad=True). act_cfg (dict): Config for activate layers. Default: dict(type='ReLU', inplace=True). + kwargs (dict): Other new arguments, to be compatible with MMDet update. """ def __init__(self, @@ -40,7 +41,8 @@ def __init__(self, num_convs=1, conv_cfg=dict(type='Conv3d'), norm_cfg=dict(type='BN3d', requires_grad=True), - act_cfg=dict(type='ReLU', inplace=True)): + act_cfg=dict(type='ReLU', inplace=True), + **kwargs): super().__init__() self.in_channels = in_channels diff --git a/tools/data/jester/README.md b/tools/data/jester/README.md index 7acdbe13d9..2e054ab33d 100644 --- a/tools/data/jester/README.md +++ b/tools/data/jester/README.md @@ -39,7 +39,7 @@ we add `"filename_tmpl='{:05}.jpg'"` to the dict of `data.train`, `data.val` and ``` data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/tools/data/jester/README_zh-CN.md b/tools/data/jester/README_zh-CN.md index fdfda97d65..4b3fb17f0b 100644 --- a/tools/data/jester/README_zh-CN.md +++ b/tools/data/jester/README_zh-CN.md @@ -39,7 +39,7 @@ cd $MMACTION2/tools/data/jester/ ```python data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/tools/data/sthv1/README.md b/tools/data/sthv1/README.md index 5b93de2a2f..75f4c11134 100644 --- a/tools/data/sthv1/README.md +++ b/tools/data/sthv1/README.md @@ -40,7 +40,7 @@ Since the prefix of official JPGs is "%05d.jpg" (e.g., "00001.jpg"), users need ``` data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, train=dict( type=dataset_type, ann_file=ann_file_train, diff --git a/tools/data/sthv1/README_zh-CN.md b/tools/data/sthv1/README_zh-CN.md index 262f51dabf..11cc9318be 100644 --- a/tools/data/sthv1/README_zh-CN.md +++ b/tools/data/sthv1/README_zh-CN.md @@ -38,7 +38,7 @@ cd $MMACTION2/tools/data/sthv1/ ``` data = dict( videos_per_gpu=16, - workers_per_gpu=4, + workers_per_gpu=2, train=dict( type=dataset_type, ann_file=ann_file_train,