-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathleast_distance_stage1_dtw.py
147 lines (119 loc) · 4.55 KB
/
least_distance_stage1_dtw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 1 22:38:53 2023
@author: Raghavan
This code sees if the mfcc's along with least distance algorithm
can be used to match a small part of an audio within a large part.
This is stage1 where the comparision is done in the time domain.
That is a small duration of the audio is used as query and the larger
duration is used as reference.
In stage0 the comparision was done at the mfcc domain, meaning a small
subset of mfcc's was queried against a superset of mfcc's
"""
import librosa
import numpy as np
from scipy.spatial.distance import cdist
from math import inf
from typing import List, Tuple
N_MFCC: int = 13
REFERENCE_DURATION: int = 5 # in secs
QUERY_DURATION: int = 2 # in secs
QUERY_OFFSET_DURATION: int = 1 # in secs
hop_samples: int = 512
window_samples: int = 2048
NUM_WINDOWS_RANGE = 2
file: str = r"data/ds1.wav"
y, sr = librosa.load(file, sr=None)
reference_samples: int = int(sr * REFERENCE_DURATION)
query_samples: int = int(sr * QUERY_DURATION)
query_offset_samples: int = int(sr * QUERY_OFFSET_DURATION)
print(
"Ref samples %d q %d q off %d hop %d win %d"
% (
reference_samples,
query_samples,
query_offset_samples,
hop_samples,
window_samples,
)
)
"""
The audio is split into references,of duration REFERENCE_DURATION secs.
In each reference (REFERENCE_DURATION - QUERY_DURATION)
/ QUERY_OFFSET_DURATION queries are formed.
If the REFERENCE_DURATION is 5 secs and QUERY_DURATION is 2 sec and
QUERY_OFFSET_DURATION is 1 sec, then queries from 0-2sec, 1-3, 2-4, 3-5
are formed.
"""
# doing len(y) - reference_samples to leave out the last part, which maybe small enough to be lesser than n_fft
for i in range(0, len(y) - reference_samples, reference_samples):
y_reference = y[i : i + reference_samples]
mfcc_reference = librosa.feature.mfcc(
y=y_reference,
sr=sr,
n_mfcc=N_MFCC,
n_fft=window_samples,
hop_length=hop_samples,
center=False,
)
for j in range(i, i + reference_samples - query_samples, query_offset_samples):
y_query = y[j : j + query_samples]
mfcc_query = librosa.feature.mfcc(
y=y_query,
sr=sr,
n_mfcc=N_MFCC,
n_fft=window_samples,
hop_length=hop_samples,
center=False,
)
D, wp = librosa.sequence.dtw(mfcc_query, mfcc_reference, subseq=True)
start_index = wp[-1][1]
end_index = wp[0][1]
# start_index, end_index = least_distance_mfcc(mfcc_reference, mfcc_query)
expected_start, expected_end = (j - i), (j - i) + query_samples
"""
there will not be an exact match since the comparison is in the
time domain. For eg: if the query is from 2-4 secs, then the
start and the end of the query will not be aligned with the
hops within the reference. Hence seeing if the matches are
within a range of NUM_WINDOWS_RANGE number of windows.
"""
start_range_begin = expected_start - (window_samples * NUM_WINDOWS_RANGE)
start_range_end = expected_start + (window_samples * NUM_WINDOWS_RANGE)
end_range_begin = expected_end - (window_samples * NUM_WINDOWS_RANGE)
end_range_end = expected_end + (window_samples * NUM_WINDOWS_RANGE)
if (start_index * hop_samples) not in range(
start_range_begin, start_range_end
) or (end_index * hop_samples) not in range(end_range_begin, end_range_end):
print(
"*** i %d j %d exp st %d [diff %d] end %d [diff %d]"
% (
i,
j,
expected_start,
abs(start_index * hop_samples - expected_start),
expected_end,
abs(end_index * hop_samples - expected_end),
)
)
print(
"start = %d(sec %f sample %d) end = %d (sec %f sample %d)"
% (
start_index,
(start_index * hop_samples) / sr,
start_index * hop_samples,
end_index,
(end_index * hop_samples) / sr,
end_index * hop_samples,
)
)
print(
"Look for %d to %d against %d to %d"
% (
j,
j + query_samples,
i + (start_index * hop_samples),
i + (end_index * hop_samples),
)
)
print("DONE :")