forked from v-iashin/video_features
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
98 lines (85 loc) · 5.18 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import argparse
import torch
from utils.utils import form_list_from_user_input
def parallel_feature_extraction(args):
'''Distributes the feature extraction in a embarasingly-parallel fashion. Specifically,
it divides the dataset (list of video paths) among all specified devices evenly.'''
if args.feature_type == 'i3d':
from models.i3d.extract_i3d import ExtractI3D # defined here to avoid import errors
extractor = ExtractI3D(args)
elif args.feature_type == 'vggish':
if args.pytorch:
from models.vggish_torch.extract_vggish import ExtractVGGish
print("Using pytorch implementation!")
extractor = ExtractVGGish(args)
else:
from models.vggish.extract_vggish import ExtractVGGish
extractor = ExtractVGGish(args)
# the indices correspond to the positions of the target videos in
# the video_paths list. They are required here because
# scatter module inputs only tensors but there is no such torch tensor
# that would be suitable for strings (video_paths). Also, the
# input have the method '.device' which allows us to access the
# current device in the extractor.
video_paths = form_list_from_user_input(args)
if args.nocuda:
print("Running without cuda support!")
indices = torch.arange(len(video_paths), device='cpu')
extractor.forward(indices)
else:
extractor.cuda()
indices = torch.arange(len(video_paths))
replicas = torch.nn.parallel.replicate(extractor, args.device_ids[:len(indices)])
inputs = torch.nn.parallel.scatter(indices, args.device_ids[:len(indices)])
torch.nn.parallel.parallel_apply(replicas[:len(inputs)], inputs)
# closing the tqdm progress bar to avoid some unexpected errors due to multi-threading
extractor.progress.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Extract Features')
# Main args
parser.add_argument('--feature_type', required=True, choices=['i3d', 'vggish'])
parser.add_argument('--video_paths', nargs='+', help='space-separated paths to videos')
parser.add_argument('--file_with_video_paths', help='.txt file where each line is a path')
parser.add_argument('--device_ids', type=int, nargs='+', help='space-separated device ids')
parser.add_argument('--tmp_path', default='./tmp',
help='folder to store the extracted frames before the extraction')
parser.add_argument('--keep_frames', dest='keep_frames', action='store_true', default=False,
help='to keep frames after feature extraction')
parser.add_argument('--on_extraction', default='print', choices=['print', 'save_numpy'],
help='what to do once the stack is extracted')
parser.add_argument('--output_path', default='./output', help='where to store results if saved')
# I3D options
parser.add_argument('--pwc_path', default='./models/i3d/checkpoints/pwc_net.pt')
parser.add_argument('--i3d_rgb_path', default='./models/i3d/checkpoints/i3d_rgb.pt')
parser.add_argument('--i3d_flow_path', default='./models/i3d/checkpoints/i3d_flow.pt')
parser.add_argument('--min_side_size', type=int, default=256, help='min(HEIGHT, WIDTH)')
parser.add_argument('--extraction_fps', type=int, help='Do not specify for original video fps')
parser.add_argument('--stack_size', type=int, default=64, help='Feature time span in fps')
parser.add_argument('--step_size', type=int, default=64, help='Feature step size in fps')
parser.add_argument(
'--show_kinetics_pred', dest='show_kinetics_pred', action='store_true', default=False,
help='to show the predictions of the i3d model into kinetics classes for each feature'
)
parser.add_argument('--kinetics_class_labels', default='./checkpoints/label_map.txt')
# VGGish options
parser.add_argument('--vggish_model_path', default='./models/{}/checkpoints/vggish_model.')
parser.add_argument('--vggish_pca_path', default='./models/{}/checkpoints/vggish_postprocess.')
parser.add_argument('--nocuda', dest='nocuda', action='store_true', help='When set, the experiments'
'are run without CUDA support.')
# Tensorflow or Pytorch implementation
parser.add_argument('--pytorch', default=False,
help="Enable running with Pytorch, if not set, the default Tensorflow implementation is used.")
args = parser.parse_args()
# Interpolate path to have correct file extensions and submodule directory
if '{}' in args.vggish_model_path:
args.vggish_model_path = str(args.vggish_model_path).format(
"vggish_torch" if args.pytorch else "vggish") + ("pt" if args.pytorch else "ckpt")
if '{}' in args.vggish_pca_path:
args.vggish_pca_path = str(args.vggish_pca_path).format("vggish_torch" if args.pytorch else "vggish") + (
"pt" if args.pytorch else "npz")
# some printing
if args.on_extraction == 'save_numpy':
print(f'Saving features to {args.output_path}')
if args.keep_frames:
print(f'Keeping temp files in {args.tmp_path}')
parallel_feature_extraction(args)