-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathfeatures.py
154 lines (125 loc) · 5.07 KB
/
features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/usr/bin/env python
__author__ = "Oriol Nieto"
__copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)"
__license__ = "GPL"
__version__ = "1.0"
__email__ = "[email protected]"
import essentia
import essentia.standard as ES
import json
import logging
import os
import numpy as np
import utils
# Setup main params
SAMPLE_RATE = 44100
FRAME_SIZE = 2048
HOP_SIZE = 1024
#WINDOW_TYPE = "blackmanharris74"
WINDOW_TYPE = "hann"
OUTPUT_FEATURES = "features"
class STFTFeature:
"""Class to easily compute the features that require a frame based
spectrum process (or STFT)."""
def __init__(self, frame_size, hop_size, window_type, feature,
beats, sample_rate):
"""STFTFeature constructor."""
self.frame_size = frame_size
self.hop_size = hop_size
self.window_type = window_type
self.w = ES.Windowing(type=window_type)
self.spectrum = ES.Spectrum()
self.feature = feature # Essentia feature object
self.beats = beats
self.sample_rate = sample_rate
def compute_features(self, audio):
"""Computes the specified Essentia features from the audio array."""
features = []
for frame in ES.FrameGenerator(audio,
frameSize=self.frame_size, hopSize=self.hop_size):
if self.feature.name() == "MFCC":
bands, coeffs = self.feature(self.spectrum(self.w(frame)))
elif self.feature.name() == "HPCP":
spectral_peaks = ES.SpectralPeaks()
freqs, mags = spectral_peaks(self.spectrum(self.w(frame)))
coeffs = self.feature(freqs, mags)
features.append(coeffs)
# Convert to Essentia Numpy array
features = essentia.array(features)
if self.beats != []:
framerate = self.sample_rate / float(self.hop_size)
tframes = np.arange(features.shape[0]) / float(framerate)
features = utils.resample_mx(features.T, tframes, self.beats).T
return features
def compute_beats(audio):
"""Computes the beats using Essentia."""
logging.info("Computing Beats...")
ticks, conf = ES.BeatTrackerMultiFeature()(audio)
return ticks, conf
def compute_beatsync_features(ticks, audio):
"""Computes the HPCP and MFCC beat-synchronous features given a set
of beats (ticks)."""
MFCC = STFTFeature(FRAME_SIZE, HOP_SIZE, WINDOW_TYPE,
ES.MFCC(numberCoefficients=14), ticks, SAMPLE_RATE)
HPCP = STFTFeature(FRAME_SIZE, HOP_SIZE, WINDOW_TYPE, ES.HPCP(),
ticks, SAMPLE_RATE)
logging.info("Computing Beat-synchronous MFCCs...")
mfcc = MFCC.compute_features(audio)
logging.info("Computing Beat-synchronous HPCPs...")
hpcp = HPCP.compute_features(audio)
logging.info("Computing Beat-synchronous Tonnetz...")
tonnetz = utils.chroma_to_tonnetz(hpcp)
return mfcc.tolist(), hpcp.tolist(), tonnetz.tolist()
def list_to_array(features):
"""Convets the features to numpy arrays."""
for key in features.keys():
features[key] = np.asarray(features[key])
return features
def compute_all_features(audio_file, audio_beats=False):
"""Computes all the features for a specific audio file and its respective
human annotations.
Returns
-------
features : dict
Dictionary with the following features:
mfcc : np.array
Mel Frequency Cepstral Coefficients representation
hpcp : np.array
Harmonic Pitch Class Profiles
tonnets : np.array
Tonal Centroids (or Tonnetz)
"""
# Makes sure the output features folder exists
utils.ensure_dir(OUTPUT_FEATURES)
features_file = os.path.join(OUTPUT_FEATURES,
os.path.basename(audio_file) + ".json")
# If already precomputed, read and return
if os.path.exists(features_file):
with open(features_file, "r") as f:
features = json.load(f)
return list_to_array(features)
# Load Audio
logging.info("Loading audio file %s" % os.path.basename(audio_file))
audio = ES.MonoLoader(filename=audio_file, sampleRate=SAMPLE_RATE)()
duration = len(audio) / float(SAMPLE_RATE)
# Estimate Beats
features = {}
ticks, conf = compute_beats(audio)
ticks = np.concatenate(([0], ticks, [duration])) # Add first and last time
ticks = essentia.array(np.unique(ticks))
features["beats"] = ticks.tolist()
# Compute Beat-sync features
features["mfcc"], features["hpcp"], features["tonnetz"] = \
compute_beatsync_features(ticks, audio)
# Save output as audio file
if audio_beats:
logging.info("Saving Beats as an audio file")
marker = ES.AudioOnsetsMarker(onsets=ticks, type='beep',
sampleRate=SAMPLE_RATE)
marked_audio = marker(audio)
ES.MonoWriter(filename='beats.wav',
sampleRate=SAMPLE_RATE)(marked_audio)
# Save features
with open(features_file, "w") as f:
json.dump(features, f)
return list_to_array(features)