forked from CPJKU/madmom
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbeats_crf.pyx
242 lines (197 loc) · 7.22 KB
/
beats_crf.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# encoding: utf-8
# cython: embedsignature=True
"""
This module contains the speed crucial Viterbi functionality for the
CRFBeatDetector plus some functions computing the distributions and
normalisation factors.
References
----------
.. [1] Filip Korzeniowski, Sebastian Böck and Gerhard Widmer,
"Probabilistic Extraction of Beat Positions from a Beat Activation
Function",
Proceedings of the 15th International Society for Music Information
Retrieval Conference (ISMIR), 2014.
"""
from __future__ import absolute_import, division, print_function
import numpy as np
cimport numpy as np
cimport cython
from numpy.math cimport INFINITY
def initial_distribution(num_states, interval):
"""
Compute the initial distribution.
Parameters
----------
num_states : int
Number of states in the model.
interval : int
Beat interval of the piece [frames].
Returns
-------
numpy array
Initial distribution of the model.
"""
# We leave the initial distribution un-normalised because we want the
# position of the first beat not to influence the probability of the
# beat sequence. Normalising would favour shorter intervals.
init_dist = np.ones(num_states, dtype=np.float32)
init_dist[interval:] = 0
return init_dist
def transition_distribution(interval, interval_sigma):
"""
Compute the transition distribution between beats.
Parameters
----------
interval : int
Interval of the piece [frames].
interval_sigma : float
Allowed deviation from the interval per beat.
Returns
-------
numpy array
Transition distribution between beats.
"""
from scipy.stats import norm
move_range = np.arange(interval * 2, dtype=np.float)
# to avoid floating point hell due to np.log2(0)
move_range[0] = 0.000001
trans_dist = norm.pdf(np.log2(move_range),
loc=np.log2(interval),
scale=interval_sigma)
trans_dist /= trans_dist.sum()
return trans_dist.astype(np.float32)
def normalisation_factors(activations, transition_distribution):
"""
Compute normalisation factors for model.
Parameters
----------
activations : numpy array
Beat activation function of the piece.
transition_distribution : numpy array
Transition distribution of the model.
Returns
-------
numpy array
Normalisation factors for model.
"""
from scipy.ndimage.filters import correlate1d
return correlate1d(activations, transition_distribution,
mode='constant', cval=0,
origin=-int(transition_distribution.shape[0] / 2))
def best_sequence(activations, interval, interval_sigma):
"""
Extract the best beat sequence for a piece with the Viterbi algorithm.
Parameters
----------
activations : numpy array
Beat activation function of the piece.
interval : int
Beat interval of the piece.
interval_sigma : float
Allowed deviation from the interval per beat.
Returns
-------
beat_pos : numpy array
Extracted beat positions [frame indices].
log_prob : float
Log probability of the beat sequence.
"""
init = initial_distribution(activations.shape[0],
interval)
trans = transition_distribution(interval, interval_sigma)
norm_fact = normalisation_factors(activations, trans)
# ignore division by zero warnings when taking the logarithm of 0.0,
# the result -inf is fine anyways!
with np.errstate(divide='ignore'):
init = np.log(init)
trans = np.log(trans)
norm_fact = np.log(norm_fact)
log_act = np.log(activations)
return viterbi(init, trans, norm_fact, log_act, interval)
@cython.cdivision(True)
@cython.boundscheck(False)
@cython.wraparound(False)
def viterbi(float [::1] pi, float[::1] transition, float[::1] norm_factor,
float [::1] activations, int tau):
"""
Viterbi algorithm to compute the most likely beat sequence from the
given activations and the dominant interval.
Parameters
----------
pi : numpy array
Initial distribution.
transition : numpy array
Transition distribution.
norm_factor : numpy array
Normalisation factors.
activations : numpy array
Beat activations.
tau : int
Dominant interval [frames].
Returns
-------
beat_pos : numpy array
Extracted beat positions [frame indices].
log_prob : float
Log probability of the beat sequence.
"""
# number of states
cdef int num_st = activations.shape[0]
# number of transitions
cdef int num_tr = transition.shape[0]
# number of beat variables
cdef int num_x = num_st / tau
# current viterbi variables
cdef float [::1] v_c = np.empty(num_st, dtype=np.float32)
# previous viterbi variables. will be initialized with prior (first beat)
cdef float [::1] v_p = np.empty(num_st, dtype=np.float32)
# back-tracking pointers;
cdef long [:, ::1] bps = np.empty((num_x - 1, num_st), dtype=np.int)
# back tracked path, a.k.a. path sequence
cdef long [::1] path = np.empty(num_x, dtype=np.int)
# counters etc.
cdef int k, i, j, next_state
cdef double new_prob, path_prob
# init first beat
for i in range(num_st):
v_p[i] = pi[i] + activations[i] + norm_factor[i]
# iterate over all beats; the 1st beat is given by prior
for k in range(num_x - 1):
# reset all current viterbi variables
v_c[:] = -INFINITY
# find the best transition for each state i
for i in range(num_st):
# j is the number of frames we look back
for j in range(min(i, num_tr)):
# Important remark: the actual computation we'd have to do here
# is v_p[i - j] + norm_factor[i - j] + transition[j] +
# activations[i].
#
# For speedup, we can add the activation after
# the loop, since it does not change with j. Additionally,
# if we immediately add the normalisation factor to v_c[i],
# we can skip adding norm_factor[i - j] for each v_p[i - j].
new_prob = v_p[i - j] + transition[j]
if new_prob > v_c[i]:
v_c[i] = new_prob
bps[k, i] = i - j
# Add activation and norm_factor. For the last random variable,
# we'll subtract norm_factor later when searching the maximum
v_c[i] += activations[i] + norm_factor[i]
v_p, v_c = v_c, v_p
# add the final best state to the path
path_prob = -INFINITY
for i in range(num_st):
# subtract the norm factor because they shouldn't have been added
# for the last random variable
v_p[i] -= norm_factor[i]
if v_p[i] > path_prob:
next_state = i
path_prob = v_p[i]
path[num_x - 1] = next_state
# track the path backwards
for i in range(num_x - 2, -1, -1):
next_state = bps[i, next_state]
path[i] = next_state
# return the best sequence and its log probability
return np.asarray(path), path_prob