-
Notifications
You must be signed in to change notification settings - Fork 0
/
torch_fft.py
103 lines (84 loc) · 4.3 KB
/
torch_fft.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import torch
import os
import random
import numpy as np
import time
def print_memory_usage(message=""):
"""Print GPU memory usage."""
allocated = torch.cuda.memory_allocated() / (1024 ** 2)
reserved = torch.cuda.memory_reserved() / (1024 ** 2)
print(f"{message} - Allocated: {allocated:.2f} MB, Reserved: {reserved:.2f} MB")
# Function to load videos
def load_videos(data_dir, num_videos=None):
print_memory_usage('Before loading videos')
video_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith('.npy')]
# Limit the number of videos if specified
if num_videos is not None and num_videos < len(video_files):
video_files = random.sample(video_files, num_videos)
videos = [torch.tensor(np.load(file)) for file in video_files]
print_memory_usage('After loading videos')
return videos
# Function to normalize a batch of video sequences
def normalize(video_batch):
return (video_batch - video_batch.mean(dim=(1,2), keepdim=True)) / video_batch.std(dim=(1,2), keepdim=True)
# Function to pad video sequences to the same length within a batch
def pad_videos(video_batch):
max_len = max(video.shape[0] for video in video_batch) # Find the max length in the batch
padded_videos = []
for video in video_batch:
padding_size = max_len - video.shape[0]
padded_video = torch.nn.functional.pad(video, (0, 0, 0, padding_size), mode='constant', value=0)
padded_videos.append(padded_video)
return torch.stack(padded_videos)
def fft_distance_2d_batch(video_batch1, video_batch2):
# Normalize the video batches
video_batch1 = normalize(video_batch1)
video_batch2 = normalize(video_batch2)
# Get the size of each batch
n1, d1 = video_batch1.shape[1], video_batch1.shape[2]
n2, d2 = video_batch2.shape[1], video_batch2.shape[2]
# Ensure the feature dimension is the same
assert d1 == d2, "Feature dimensions must match."
# Determine padding size (sum of temporal dimensions - 1)
pad_n = n1 + n2 - 1
# Apply 2D FFT across the entire video (frames, features)
fft_video1 = torch.fft.fft2(video_batch1, s=(pad_n, d1))
fft_video2 = torch.fft.fft2(video_batch2, s=(pad_n, d1))
# Element-wise multiplication in frequency domain and summation over feature dimension
fft_mult = torch.fft.ifft2(fft_video1 * torch.conj(fft_video2))
convolution_result = torch.real(fft_mult).sum(dim=2) # Summing over feature dimension
# Peak value in the convolution result for each pair (across temporal dimension)
peak_values = torch.amax(convolution_result, dim=1)
# Distance as the inverse of peak value (to ensure similarity yields a small distance)
distances = 1 / (peak_values + 1e-10) # Add small value to avoid division by zero
return distances
# Function to compute the distance matrix for a list of videos with batching
def cdist_fft_2d_batched(videos, batch_size=32):
num_videos = len(videos)
distance_matrix = torch.zeros((num_videos, num_videos), device=videos[0].device)
print_memory_usage('After Forming Matrix')
for i in range(num_videos):
# Prepare batches for parallel processing
for j in range(i + 1, num_videos, batch_size):
end = min(j + batch_size, num_videos)
batch_videos_j = pad_videos(videos[j:end]) # Pad videos in the batch
batch_videos_i = pad_videos([videos[i]] * (end - j))
# Compute distances for the batch
distances = fft_distance_2d_batch(batch_videos_i, batch_videos_j)
# Update the distance matrix
distance_matrix[i, j:end] = distances
distance_matrix[j:end, i] = distances # Symmetric matrix
#del batch_videos_j, batch_videos_i, distances
torch.cuda.empty_cache()
print_memory_usage('{} Iteration'.format(i))
return distance_matrix
def cuda_fft_distances(data_dir, num_videos, batch=32):
videos = load_videos(data_dir, num_videos)
videos = [video.to('cuda') for video in videos] # Move videos to GPU
print_memory_usage('Moved video to Gpu')
distance_matrix = cdist_fft_2d_batched(videos, batch_size=batch)
print_memory_usage('After Calculating Distance Matrix')
time.sleep(2)
torch.cuda.empty_cache()
print_memory_usage('Emptied cache')
return distance_matrix.to('cpu').numpy()