From 6b5de3ea8a219156e63a3fb2397decf62b7bd8af Mon Sep 17 00:00:00 2001 From: lilin Date: Wed, 6 Sep 2023 21:57:01 +0800 Subject: [PATCH] [Fix] Fix feature extraction script --- tools/data/activitynet/README.md | 16 ++++++++-------- tools/data/activitynet/README_zh-CN.md | 16 ++++++++-------- .../activitynet/tsn_extract_flow_feat_config.py | 4 ++-- .../activitynet/tsn_extract_rgb_feat_config.py | 4 ++-- .../activitynet/tsn_extract_video_feat_config.py | 4 ++-- tools/data/hacs/write_feature_csv.py | 2 +- tools/misc/clip_feature_extraction.py | 10 +++++----- tools/misc/dist_clip_feature_extraction.sh | 5 +++-- 8 files changed, 31 insertions(+), 30 deletions(-) diff --git a/tools/data/activitynet/README.md b/tools/data/activitynet/README.md index 17daef6acd..aea9e8b084 100644 --- a/tools/data/activitynet/README.md +++ b/tools/data/activitynet/README.md @@ -109,28 +109,28 @@ After finetuning TSN on ActivityNet, you can use it to extract both RGB and Flow ```shell python ../../misc/clip_feature_extraction.py tsn_extract_rgb_feat_config.py \ - /path/to/rgb_checkpoint.pth ../../../data/ActivityNet/rgb_tarin_feat.pkl \ + /path/to/rgb_checkpoint.pth ../../../data/ActivityNet/rgb_tarin_feat \ --video-list ../../../data/ActivityNet/anet_train_video.txt \ --video-root ../../../data/ActivityNet/rawframes \ - --dump-score + --dump-score --long-video-mode python ../../misc/clip_feature_extraction.py tsn_extract_rgb_feat_config.py \ - path/to/rgb_checkpoint.pth ../../../data/ActivityNet/rgb_val_feat.pkl \ + path/to/rgb_checkpoint.pth ../../../data/ActivityNet/rgb_val_feat \ --video-list ../../../data/ActivityNet/anet_val_video.txt \ --video-root ../../../data/ActivityNet/rawframes \ - --dump-score + --dump-score --long-video-mode python ../../misc/clip_feature_extraction.py tsn_extract_flow_feat_config.py \ - /path/to/flow_checkpoint.pth ../../../data/ActivityNet/flow_tarin_feat.pkl \ + /path/to/flow_checkpoint.pth ../../../data/ActivityNet/flow_tarin_feat \ --video-list ../../../data/ActivityNet/anet_train_video.txt \ --video-root ../../../data/ActivityNet/rawframes \ - --dump-score + --dump-score --long-video-mode python ../../misc/clip_feature_extraction.py tsn_extract_flow_feat_config.py \ - /path/to/flow_checkpoint.pth ../../../data/ActivityNet/flow_val_feat.pkl \ + /path/to/flow_checkpoint.pth ../../../data/ActivityNet/flow_val_feat \ --video-list ../../../data/ActivityNet/anet_val_video.txt \ --video-root ../../../data/ActivityNet/rawframes \ - --dump-score + --dump-score --long-video-mode ``` After feature extraction, you can use our post processing scripts to concat RGB and Flow feature, generate the `100-t X 400-d` feature for Action Detection. diff --git a/tools/data/activitynet/README_zh-CN.md b/tools/data/activitynet/README_zh-CN.md index 83969041c2..a31263df4c 100644 --- a/tools/data/activitynet/README_zh-CN.md +++ b/tools/data/activitynet/README_zh-CN.md @@ -108,28 +108,28 @@ python generate_rawframes_filelist.py ```shell python ../../misc/clip_feature_extraction.py tsn_extract_rgb_feat_config.py \ - /path/to/rgb_checkpoint.pth ../../../data/ActivityNet/rgb_tarin_feat.pkl \ + /path/to/rgb_checkpoint.pth ../../../data/ActivityNet/rgb_tarin_feat \ --video-list ../../../data/ActivityNet/anet_train_video.txt \ --video-root ../../../data/ActivityNet/rawframes \ - --dump-score + --dump-score --long-video-mode python ../../misc/clip_feature_extraction.py tsn_extract_rgb_feat_config.py \ - path/to/rgb_checkpoint.pth ../../../data/ActivityNet/rgb_val_feat.pkl \ + path/to/rgb_checkpoint.pth ../../../data/ActivityNet/rgb_val_feat \ --video-list ../../../data/ActivityNet/anet_val_video.txt \ --video-root ../../../data/ActivityNet/rawframes \ - --dump-score + --dump-score --long-video-mode python ../../misc/clip_feature_extraction.py tsn_extract_flow_feat_config.py \ - /path/to/flow_checkpoint.pth ../../../data/ActivityNet/flow_tarin_feat.pkl \ + /path/to/flow_checkpoint.pth ../../../data/ActivityNet/flow_tarin_feat \ --video-list ../../../data/ActivityNet/anet_train_video.txt \ --video-root ../../../data/ActivityNet/rawframes \ - --dump-score + --dump-score --long-video-mode python ../../misc/clip_feature_extraction.py tsn_extract_flow_feat_config.py \ - /path/to/flow_checkpoint.pth ../../../data/ActivityNet/flow_val_feat.pkl \ + /path/to/flow_checkpoint.pth ../../../data/ActivityNet/flow_val_feat \ --video-list ../../../data/ActivityNet/anet_val_video.txt \ --video-root ../../../data/ActivityNet/rawframes \ - --dump-score + --dump-score --long-video-mode ``` 在提取完特征后,用户可以使用后处理脚本整合 RGB 特征和光流特征,生成 `100-t X 400-d` 维度的特征用于时序动作检测。 diff --git a/tools/data/activitynet/tsn_extract_flow_feat_config.py b/tools/data/activitynet/tsn_extract_flow_feat_config.py index e09c3f99c1..d713de58c8 100644 --- a/tools/data/activitynet/tsn_extract_flow_feat_config.py +++ b/tools/data/activitynet/tsn_extract_flow_feat_config.py @@ -10,8 +10,8 @@ # dataset settings dataset_type = 'RawframeDataset' -data_root_val = 'data/kinetics400/rawframes_val' -ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +data_root_val = 'data/activitynet/rawframes_val' +ann_file_val = 'data/activitynet/activitynet_val_list_rawframes.txt' file_client_args = dict(io_backend='disk') diff --git a/tools/data/activitynet/tsn_extract_rgb_feat_config.py b/tools/data/activitynet/tsn_extract_rgb_feat_config.py index 803e031935..f64976ba1f 100644 --- a/tools/data/activitynet/tsn_extract_rgb_feat_config.py +++ b/tools/data/activitynet/tsn_extract_rgb_feat_config.py @@ -5,8 +5,8 @@ # dataset settings dataset_type = 'RawframeDataset' -data_root_val = 'data/kinetics400/rawframes_val' -ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt' +data_root_val = 'data/activitynet/rawframes_val' +ann_file_val = 'data/activitynet/activitynet_val_list_rawframes.txt' file_client_args = dict(io_backend='disk') diff --git a/tools/data/activitynet/tsn_extract_video_feat_config.py b/tools/data/activitynet/tsn_extract_video_feat_config.py index ab815c1318..ed95e4f61e 100644 --- a/tools/data/activitynet/tsn_extract_video_feat_config.py +++ b/tools/data/activitynet/tsn_extract_video_feat_config.py @@ -5,8 +5,8 @@ # dataset settings dataset_type = 'VideoDataset' -data_root_val = 'data/kinetics400/videos_val' -ann_file_val = 'data/kinetics400/kinetics400_val_list_videos.txt' +data_root_val = 'data/activitynet/videos_val' +ann_file_val = 'data/activitynet/activitynet_val_list_videos.txt' file_client_args = dict(io_backend='disk') diff --git a/tools/data/hacs/write_feature_csv.py b/tools/data/hacs/write_feature_csv.py index 9fb098b4bf..9f95f7e068 100644 --- a/tools/data/hacs/write_feature_csv.py +++ b/tools/data/hacs/write_feature_csv.py @@ -11,7 +11,7 @@ for feature, video in zip(features, video_list): video_id = video.split()[0].split('/')[1] csv_file = video_id.replace('mp4', 'csv') - feat = feature['pred_scores']['item'].numpy() + feat = feature['pred_score'].numpy() feat = feat.tolist() csv_path = f'{feature_dir}/{csv_file}' with open(csv_path, 'w') as f: diff --git a/tools/misc/clip_feature_extraction.py b/tools/misc/clip_feature_extraction.py index 332f60a961..71e8ff6528 100644 --- a/tools/misc/clip_feature_extraction.py +++ b/tools/misc/clip_feature_extraction.py @@ -5,6 +5,7 @@ from mmengine import dump, list_from_file, load from mmengine.config import Config, DictAction +from mmengine.dist.utils import is_main_process from mmengine.runner import Runner @@ -133,8 +134,7 @@ def merge_args(cfg, args): for idx, transform in enumerate(test_pipeline): if transform.type == 'SampleFrames': test_pipeline[idx]['twice_sample'] = False - # if transform.type in ['ThreeCrop', 'TenCrop']: - if transform.type == 'TenCrop': + if transform.type in ['ThreeCrop', 'TenCrop']: test_pipeline[idx].type = 'CenterCrop' # -------------------- pipeline settings -------------------- @@ -231,7 +231,7 @@ def preprocess(self, inputs, data_samples, training=False): def split_feats(args): total_feats = load(args.dump) if args.dump_score: - total_feats = [sample['pred_scores']['item'] for sample in total_feats] + total_feats = [sample['pred_score'] for sample in total_feats] video_list = list_from_file(args.video_list) video_list = [line.split(' ')[0] for line in video_list] @@ -258,8 +258,8 @@ def main(): # start testing runner.test() - - split_feats(args) + if is_main_process(): + split_feats(args) if __name__ == '__main__': diff --git a/tools/misc/dist_clip_feature_extraction.sh b/tools/misc/dist_clip_feature_extraction.sh index f5c7a1a607..b9529457cd 100644 --- a/tools/misc/dist_clip_feature_extraction.sh +++ b/tools/misc/dist_clip_feature_extraction.sh @@ -3,10 +3,11 @@ CONFIG=$1 CHECKPOINT=$2 GPUS=$3 +OUTPUT_DIR=$4 PORT=${PORT:-29500} PYTHONPATH="$(dirname $0)/../..":$PYTHONPATH \ # Arguments starting from the forth one are captured by ${@:4} python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ - $(dirname "$0")/clip_feature_extraction.py $CONFIG $CHECKPOINT \ - --launcher pytorch ${@:4} + $(dirname "$0")/clip_feature_extraction.py $CONFIG $CHECKPOINT $OUTPUT_DIR \ + --launcher pytorch ${@:5}