Skip to content

Commit

Permalink
Catch file not found errors and expand MFCC computation (#3)
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcauliffe authored Aug 25, 2023
1 parent 34d4999 commit 5737936
Show file tree
Hide file tree
Showing 9 changed files with 140 additions and 31 deletions.
12 changes: 12 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@
Changelog
=========

0.5.1
-----

- Added better error handling for creating archives for files that don't exist
- Added more input types for computing MFCCs

0.5.0
-----

- First official release
- Expanded functionality across all modules

0.1.0
------

Expand Down
38 changes: 29 additions & 9 deletions extensions/feat/feat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ template <class Feature>
void offline_feature(py::module& m, const std::string& feat_type) {
py::class_<OfflineFeatureTpl<Feature>>(m, feat_type.c_str())
.def(py::init<const typename Feature::Options&>())
.def("ComputeFeatures", &OfflineFeatureTpl<Feature>::ComputeFeatures)
.def("ComputeFeatures",
&OfflineFeatureTpl<Feature>::ComputeFeatures,
py::call_guard<py::gil_scoped_release>())
.def("compute", [](
const OfflineFeatureTpl<Feature>& v,
py::array_t<float> x
Expand All @@ -34,6 +36,16 @@ void offline_feature(py::module& m, const std::string& feat_type) {
v.Compute(vector, vtln_warp, &features);
return features;
})
.def("compute", [](
const OfflineFeatureTpl<Feature>& v,
const Vector<float> &vector
) -> Matrix<float> {
py::gil_scoped_release gil_release;
float vtln_warp = 1.0;
Matrix<float> features;
v.Compute(vector, vtln_warp, &features);
return features;
})
.def("Dim", &OfflineFeatureTpl<Feature>::Dim);
}

Expand All @@ -46,23 +58,26 @@ void feat_signal(py::module& m){
"It is suggested to use the FFT-based convolution function which is more "
"efficient.",
py::arg("filter"),
py::arg("signal"));
py::arg("signal"),
py::call_guard<py::gil_scoped_release>());

m.def("FFTbasedConvolveSignals",
&FFTbasedConvolveSignals,
"This function implements FFT-based convolution of two signals. "
"However this should be an inefficient version of BlockConvolveSignals() "
"as it processes the entire signal with a single FFT.",
py::arg("filter"),
py::arg("signal"));
py::arg("signal"),
py::call_guard<py::gil_scoped_release>());

m.def("FFTbasedBlockConvolveSignals",
&FFTbasedBlockConvolveSignals,
"This function implements FFT-based block convolution of two signals using "
"overlap-add method. This is an efficient way to evaluate the discrete "
"convolution of a long signal with a finite impulse response filter.",
py::arg("filter"),
py::arg("signal"));
py::arg("signal"),
py::call_guard<py::gil_scoped_release>());
}

void feat_feat_functions(py::module& m){
Expand All @@ -76,7 +91,8 @@ void feat_feat_functions(py::module& m){
"this function computes in the first (n/2) + 1 elements of it, the "
"energies of the fft bins from zero to the Nyquist frequency. Contents of the "
"remaining (n/2) - 1 elements are undefined at output.",
py::arg("complex_fft"));
py::arg("complex_fft"),
py::call_guard<py::gil_scoped_release>());

py::class_<DeltaFeaturesOptions>(m, "DeltaFeaturesOptions")
.def(py::init<>())
Expand All @@ -97,7 +113,8 @@ void feat_feat_functions(py::module& m){
.def("Process", &DeltaFeatures::Process,
py::arg("input_feats"),
py::arg("frame"),
py::arg("output_frame"));
py::arg("output_frame"),
py::call_guard<py::gil_scoped_release>());

py::class_<ShiftedDeltaFeaturesOptions>(m, "ShiftedDeltaFeaturesOptions")
.def(py::init<>())
Expand Down Expand Up @@ -131,7 +148,8 @@ void feat_feat_functions(py::module& m){
"features coming in.",
py::arg("delta_opts"),
py::arg("input_features"),
py::arg("output_features"));
py::arg("output_features"),
py::call_guard<py::gil_scoped_release>());

m.def("compute_deltas",
[](
Expand Down Expand Up @@ -160,7 +178,8 @@ void feat_feat_functions(py::module& m){
"convenience, however, ShiftedDeltaFeatures can be used directly.",
py::arg("delta_opts"),
py::arg("input_features"),
py::arg("output_features"));
py::arg("output_features"),
py::call_guard<py::gil_scoped_release>());

m.def("SpliceFrames",
&SpliceFrames,
Expand All @@ -175,7 +194,8 @@ void feat_feat_functions(py::module& m){
py::arg("input_features"),
py::arg("left_context"),
py::arg("right_context"),
py::arg("output_features"));
py::arg("output_features"),
py::call_guard<py::gil_scoped_release>());

m.def("splice_frames",

Expand Down
48 changes: 48 additions & 0 deletions extensions/ivector/ivector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,54 @@ void pybind_plda(py::module &m) {
py::arg("ivectors"),
py::arg("num_examples")
)
.def("score",
[](
PyClass &plda,
const VectorBase<float> & utterance_ivector,
const std::vector<Vector<float>> &transformed_enrolled_ivectors,
std::vector<int32> num_enroll_utts
){
py::gil_scoped_release gil_release;
PldaConfig plda_config;
Vector<double> ivector_one_dbl(utterance_ivector);

std::vector<BaseFloat> scores;

for (int32 j = 0; j < transformed_enrolled_ivectors.size(); j++) {
Vector<double> ivector_two_dbl(transformed_enrolled_ivectors[j]);
scores.push_back(plda.LogLikelihoodRatio(ivector_one_dbl,
num_enroll_utts[j],
ivector_two_dbl));
}
return scores;

},
py::arg("utterance_ivector"),
py::arg("transformed_enrolled_ivectors"),
py::arg("num_enroll_utts"))
.def("score",
[](
PyClass &plda,
const VectorBase<float> & utterance_ivector,
const std::vector<Vector<float>> &transformed_enrolled_ivectors
){
py::gil_scoped_release gil_release;
PldaConfig plda_config;
Vector<double> ivector_one_dbl(utterance_ivector);

std::vector<BaseFloat> scores;

for (int32 j = 0; j < transformed_enrolled_ivectors.size(); j++) {
Vector<double> ivector_two_dbl(transformed_enrolled_ivectors[j]);
scores.push_back(plda.LogLikelihoodRatio(ivector_one_dbl,
1,
ivector_two_dbl));
}
return scores;

},
py::arg("utterance_ivector"),
py::arg("transformed_enrolled_ivectors"))
.def("TransformIvector",
py::overload_cast<const PldaConfig &,
const VectorBase<double> &,
Expand Down
3 changes: 3 additions & 0 deletions kalpy/data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Data classes for kalpy"""
from __future__ import annotations

import os.path
import pathlib
import typing

Expand Down Expand Up @@ -110,6 +111,8 @@ class MatrixArchive:
"""

def __init__(self, file_name: typing.Union[pathlib.Path, str], double: bool = False):
if not os.path.exists(file_name):
raise OSError(f"Specified file does not exist: {file_name}")
self.file_name = str(file_name)
self.double = double
self.read_specifier = generate_read_specifier(file_name)
Expand Down
3 changes: 3 additions & 0 deletions kalpy/decoder/data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Classes for storing graph archives"""
from __future__ import annotations

import os.path
import pathlib
import typing

Expand All @@ -19,6 +20,8 @@ class FstArchive:
"""

def __init__(self, file_name: typing.Union[pathlib.Path, str]):
if not os.path.exists(file_name):
raise OSError(f"Specified file does not exist: {file_name}")
self.file_name = str(file_name)
self.read_specifier = generate_read_specifier(file_name)
self.random_reader = RandomAccessVectorFstReader(self.read_specifier)
Expand Down
11 changes: 7 additions & 4 deletions kalpy/feat/data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Classes for storing and processing features"""
from __future__ import annotations

import os
import pathlib
import typing

Expand Down Expand Up @@ -35,6 +36,11 @@ def __init__(
sliding_cmvn_center_window: bool = True,
double: bool = False,
):
self.cmvn_reader = None
self.transform_reader = None
self.vad_reader = None
if not os.path.exists(file_name):
raise OSError(f"Specified file does not exist: {file_name}")
self.archive = MatrixArchive(file_name, double=double)
self.utt2spk = utt2spk
self.subsample_n = subsample_n
Expand All @@ -52,9 +58,6 @@ def __init__(
self.splice_frames = splice_frames
self.use_deltas = deltas
self.use_splices = splices
self.cmvn_reader = None
self.transform_reader = None
self.vad_reader = None
self.cmvn_file_name = cmvn_file_name
if cmvn_file_name:
cmvn_read_specifier = generate_read_specifier(cmvn_file_name)
Expand Down Expand Up @@ -84,7 +87,7 @@ def __del__(self):
self.close()

def close(self):
if self.archive.random_reader.IsOpen():
if getattr(self, "archive", None) is not None and self.archive.random_reader.IsOpen():
self.archive.random_reader.Close()
if self.cmvn_reader is not None and self.cmvn_reader.IsOpen():
self.cmvn_reader.Close()
Expand Down
46 changes: 28 additions & 18 deletions kalpy/feat/mfcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import numpy as np

from _kalpy import feat
from _kalpy.matrix import CompressedMatrix, FloatMatrixBase
from _kalpy.matrix import CompressedMatrix, FloatMatrix, FloatVector
from _kalpy.util import BaseFloatMatrixWriter, CompressedMatrixWriter
from kalpy.data import Segment
from kalpy.utils import generate_write_specifier
Expand Down Expand Up @@ -170,7 +170,7 @@ def mfcc_obj(self):

def compute_mfccs(
self,
segment: Segment,
segment: typing.Union[Segment, np.ndarray],
) -> np.ndarray:
"""
Compute MFCC features for a segment
Expand All @@ -188,34 +188,44 @@ def compute_mfccs(
mfccs = self.compute_mfccs_for_export(segment, compress=False)
return mfccs.numpy()

def compute_mfccs_for_export(self, segment: Segment, compress: bool = True) -> FloatMatrixBase:
def compute_mfccs_for_export(
self, segment: typing.Union[Segment, np.ndarray, FloatVector], compress: bool = True
) -> FloatMatrix:
"""
Generate MFCCs for exporting to a kaldi archive
Parameters
----------
segment: :class:`~kalpy.feat.mfcc.Segment`
Acoustic segment to generate MFCCs
compress: bool, defaults to True
Flag for whether returned matrix should be compressed
Returns
-------
:class:`_kalpy.matrix.FloatMatrixBase`
:class:`_kalpy.matrix.FloatMatrix`
Feature matrix for the segment
"""
duration = None
if segment.end is not None and segment.begin is not None:
duration = segment.end - segment.begin
wave, sr = librosa.load(
segment.file_path,
sr=16000,
offset=segment.begin,
duration=duration,
mono=False,
)
wave = np.round(wave * 32768)
if len(wave.shape) == 2:
channel = 0 if segment.channel is None else segment.channel
wave = wave[channel, :]
if isinstance(segment, Segment):
duration = None
if segment.end is not None and segment.begin is not None:
duration = segment.end - segment.begin
wave, sr = librosa.load(
segment.file_path,
sr=16000,
offset=segment.begin,
duration=duration,
mono=False,
)
wave = np.round(wave * 32768)
if len(wave.shape) == 2:
channel = 0 if segment.channel is None else segment.channel
wave = wave[channel, :]
else:
wave = segment
if isinstance(wave, np.ndarray) and np.max(wave) < 1.0:
wave = np.round(wave * 32768)

mfccs = self.mfcc_obj.compute(wave)
if compress:
mfccs = CompressedMatrix(mfccs)
Expand Down
5 changes: 5 additions & 0 deletions kalpy/gmm/data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Data classes for GMM"""
from __future__ import annotations

import os
import pathlib
import sys
import typing
Expand Down Expand Up @@ -352,6 +353,8 @@ def __init__(
words_file_name: typing.Union[pathlib.Path, str] = None,
likelihood_file_name: typing.Union[pathlib.Path, str] = None,
):
if not os.path.exists(file_name):
raise OSError(f"Specified file does not exist: {file_name}")
self.file_name = str(file_name)
self.read_specifier = generate_read_specifier(file_name)

Expand Down Expand Up @@ -510,6 +513,8 @@ class LatticeArchive:
"""

def __init__(self, file_name: typing.Union[pathlib.Path, str], determinized: bool = True):
if not os.path.exists(file_name):
raise OSError(f"Specified file does not exist: {file_name}")
self.file_name = str(file_name)
self.determinized = determinized
self.read_specifier = generate_read_specifier(file_name)
Expand Down
5 changes: 5 additions & 0 deletions kalpy/ivector/data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Data classes for GMM"""
from __future__ import annotations

import os
import typing

from _kalpy.matrix import FloatVector
Expand All @@ -25,6 +26,8 @@ class IvectorArchive:
"""

def __init__(self, file_name: PathLike, num_utterances_file_name: PathLike = None):
if not os.path.exists(file_name):
raise OSError(f"Specified file does not exist: {file_name}")
self.file_name = str(file_name)
self.num_utterances_file_name = num_utterances_file_name
self.read_specifier = generate_read_specifier(file_name)
Expand Down Expand Up @@ -90,6 +93,8 @@ class GselectArchive:
"""

def __init__(self, file_name: PathLike):
if not os.path.exists(file_name):
raise OSError(f"Specified file does not exist: {file_name}")
self.file_name = str(file_name)
self.read_specifier = generate_read_specifier(file_name)
self.random_reader = RandomAccessInt32VectorVectorReader(self.read_specifier)
Expand Down

0 comments on commit 5737936

Please sign in to comment.