Catch file not found errors and expand MFCC computation (#3)

MontrealCorpusTools · Aug 25, 2023 · 5737936 · 5737936
1 parent 34d4999
commit 5737936
Show file tree

Hide file tree

Showing 9 changed files with 140 additions and 31 deletions.
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -4,6 +4,18 @@
 Changelog
 =========
 
+0.5.1
+-----
+
+- Added better error handling for creating archives for files that don't exist
+- Added more input types for computing MFCCs
+
+0.5.0
+-----
+
+- First official release
+- Expanded functionality across all modules
+
 0.1.0
 ------
 

diff --git a/extensions/feat/feat.cpp b/extensions/feat/feat.cpp
@@ -19,7 +19,9 @@ template <class Feature>
 void offline_feature(py::module& m, const std::string& feat_type) {
   py::class_<OfflineFeatureTpl<Feature>>(m, feat_type.c_str())
       .def(py::init<const typename Feature::Options&>())
-      .def("ComputeFeatures", &OfflineFeatureTpl<Feature>::ComputeFeatures)
+      .def("ComputeFeatures",
+      &OfflineFeatureTpl<Feature>::ComputeFeatures,
+      py::call_guard<py::gil_scoped_release>())
       .def("compute", [](
             const OfflineFeatureTpl<Feature>& v,
             py::array_t<float> x
@@ -34,6 +36,16 @@ void offline_feature(py::module& m, const std::string& feat_type) {
         v.Compute(vector, vtln_warp, &features);
         return features;
       })
+      .def("compute", [](
+            const OfflineFeatureTpl<Feature>& v,
+            const Vector<float> &vector
+            ) -> Matrix<float> {
+          py::gil_scoped_release gil_release;
+        float vtln_warp = 1.0;
+        Matrix<float> features;
+        v.Compute(vector, vtln_warp, &features);
+        return features;
+      })
       .def("Dim", &OfflineFeatureTpl<Feature>::Dim);
 }
 
@@ -46,23 +58,26 @@ void feat_signal(py::module& m){
         "It is suggested to use the FFT-based convolution function which is more "
         "efficient.",
         py::arg("filter"),
-        py::arg("signal"));
+        py::arg("signal"),
+      py::call_guard<py::gil_scoped_release>());
 
   m.def("FFTbasedConvolveSignals",
         &FFTbasedConvolveSignals,
         "This function implements FFT-based convolution of two signals. "
         "However this should be an inefficient version of BlockConvolveSignals() "
         "as it processes the entire signal with a single FFT.",
         py::arg("filter"),
-        py::arg("signal"));
+        py::arg("signal"),
+      py::call_guard<py::gil_scoped_release>());
 
   m.def("FFTbasedBlockConvolveSignals",
         &FFTbasedBlockConvolveSignals,
         "This function implements FFT-based block convolution of two signals using "
         "overlap-add method. This is an efficient way to evaluate the discrete "
         "convolution of a long signal with a finite impulse response filter.",
         py::arg("filter"),
-        py::arg("signal"));
+        py::arg("signal"),
+      py::call_guard<py::gil_scoped_release>());
 }
 
 void feat_feat_functions(py::module& m){
@@ -76,7 +91,8 @@ void feat_feat_functions(py::module& m){
         "this function computes in the first (n/2) + 1 elements of it, the "
         "energies of the fft bins from zero to the Nyquist frequency.  Contents of the "
         "remaining (n/2) - 1 elements are undefined at output.",
-        py::arg("complex_fft"));
+        py::arg("complex_fft"),
+      py::call_guard<py::gil_scoped_release>());
 
   py::class_<DeltaFeaturesOptions>(m, "DeltaFeaturesOptions")
       .def(py::init<>())
@@ -97,7 +113,8 @@ void feat_feat_functions(py::module& m){
       .def("Process", &DeltaFeatures::Process,
         py::arg("input_feats"),
         py::arg("frame"),
-        py::arg("output_frame"));
+        py::arg("output_frame"),
+      py::call_guard<py::gil_scoped_release>());
 
   py::class_<ShiftedDeltaFeaturesOptions>(m, "ShiftedDeltaFeaturesOptions")
       .def(py::init<>())
@@ -131,7 +148,8 @@ void feat_feat_functions(py::module& m){
         "features coming in.",
         py::arg("delta_opts"),
         py::arg("input_features"),
-        py::arg("output_features"));
+        py::arg("output_features"),
+      py::call_guard<py::gil_scoped_release>());
 
   m.def("compute_deltas",
         [](
@@ -160,7 +178,8 @@ void feat_feat_functions(py::module& m){
         "convenience, however, ShiftedDeltaFeatures can be used directly.",
         py::arg("delta_opts"),
         py::arg("input_features"),
-        py::arg("output_features"));
+        py::arg("output_features"),
+      py::call_guard<py::gil_scoped_release>());
 
   m.def("SpliceFrames",
         &SpliceFrames,
@@ -175,7 +194,8 @@ void feat_feat_functions(py::module& m){
         py::arg("input_features"),
         py::arg("left_context"),
         py::arg("right_context"),
-        py::arg("output_features"));
+        py::arg("output_features"),
+      py::call_guard<py::gil_scoped_release>());
 
   m.def("splice_frames",
 

diff --git a/extensions/ivector/ivector.cpp b/extensions/ivector/ivector.cpp
@@ -736,6 +736,54 @@ void pybind_plda(py::module &m) {
         py::arg("ivectors"),
         py::arg("num_examples")
         )
+      .def("score",
+        [](
+            PyClass &plda,
+            const VectorBase<float> & utterance_ivector,
+            const std::vector<Vector<float>> &transformed_enrolled_ivectors,
+            std::vector<int32> num_enroll_utts
+        ){
+          py::gil_scoped_release gil_release;
+          PldaConfig plda_config;
+          Vector<double> ivector_one_dbl(utterance_ivector);
+
+          std::vector<BaseFloat> scores;
+
+          for (int32 j = 0; j < transformed_enrolled_ivectors.size(); j++) {
+            Vector<double> ivector_two_dbl(transformed_enrolled_ivectors[j]);
+            scores.push_back(plda.LogLikelihoodRatio(ivector_one_dbl,
+                                                  num_enroll_utts[j],
+                                                  ivector_two_dbl));
+          }
+          return scores;
+
+        },
+        py::arg("utterance_ivector"),
+        py::arg("transformed_enrolled_ivectors"),
+        py::arg("num_enroll_utts"))
+      .def("score",
+        [](
+            PyClass &plda,
+            const VectorBase<float> & utterance_ivector,
+            const std::vector<Vector<float>> &transformed_enrolled_ivectors
+        ){
+          py::gil_scoped_release gil_release;
+          PldaConfig plda_config;
+          Vector<double> ivector_one_dbl(utterance_ivector);
+
+          std::vector<BaseFloat> scores;
+
+          for (int32 j = 0; j < transformed_enrolled_ivectors.size(); j++) {
+            Vector<double> ivector_two_dbl(transformed_enrolled_ivectors[j]);
+            scores.push_back(plda.LogLikelihoodRatio(ivector_one_dbl,
+                                                  1,
+                                                  ivector_two_dbl));
+          }
+          return scores;
+
+        },
+        py::arg("utterance_ivector"),
+        py::arg("transformed_enrolled_ivectors"))
       .def("TransformIvector",
         py::overload_cast<const PldaConfig &,
                           const VectorBase<double> &,

diff --git a/kalpy/data.py b/kalpy/data.py
@@ -1,6 +1,7 @@
 """Data classes for kalpy"""
 from __future__ import annotations
 
+import os.path
 import pathlib
 import typing
 
@@ -110,6 +111,8 @@ class MatrixArchive:
     """
 
     def __init__(self, file_name: typing.Union[pathlib.Path, str], double: bool = False):
+        if not os.path.exists(file_name):
+            raise OSError(f"Specified file does not exist: {file_name}")
         self.file_name = str(file_name)
         self.double = double
         self.read_specifier = generate_read_specifier(file_name)

diff --git a/kalpy/decoder/data.py b/kalpy/decoder/data.py
@@ -1,6 +1,7 @@
 """Classes for storing graph archives"""
 from __future__ import annotations
 
+import os.path
 import pathlib
 import typing
 
@@ -19,6 +20,8 @@ class FstArchive:
     """
 
     def __init__(self, file_name: typing.Union[pathlib.Path, str]):
+        if not os.path.exists(file_name):
+            raise OSError(f"Specified file does not exist: {file_name}")
         self.file_name = str(file_name)
         self.read_specifier = generate_read_specifier(file_name)
         self.random_reader = RandomAccessVectorFstReader(self.read_specifier)

diff --git a/kalpy/feat/data.py b/kalpy/feat/data.py
@@ -1,6 +1,7 @@
 """Classes for storing and processing features"""
 from __future__ import annotations
 
+import os
 import pathlib
 import typing
 
@@ -35,6 +36,11 @@ def __init__(
         sliding_cmvn_center_window: bool = True,
         double: bool = False,
     ):
+        self.cmvn_reader = None
+        self.transform_reader = None
+        self.vad_reader = None
+        if not os.path.exists(file_name):
+            raise OSError(f"Specified file does not exist: {file_name}")
         self.archive = MatrixArchive(file_name, double=double)
         self.utt2spk = utt2spk
         self.subsample_n = subsample_n
@@ -52,9 +58,6 @@ def __init__(
         self.splice_frames = splice_frames
         self.use_deltas = deltas
         self.use_splices = splices
-        self.cmvn_reader = None
-        self.transform_reader = None
-        self.vad_reader = None
         self.cmvn_file_name = cmvn_file_name
         if cmvn_file_name:
             cmvn_read_specifier = generate_read_specifier(cmvn_file_name)
@@ -84,7 +87,7 @@ def __del__(self):
         self.close()
 
     def close(self):
-        if self.archive.random_reader.IsOpen():
+        if getattr(self, "archive", None) is not None and self.archive.random_reader.IsOpen():
             self.archive.random_reader.Close()
         if self.cmvn_reader is not None and self.cmvn_reader.IsOpen():
             self.cmvn_reader.Close()

diff --git a/kalpy/feat/mfcc.py b/kalpy/feat/mfcc.py
@@ -9,7 +9,7 @@
 import numpy as np
 
 from _kalpy import feat
-from _kalpy.matrix import CompressedMatrix, FloatMatrixBase
+from _kalpy.matrix import CompressedMatrix, FloatMatrix, FloatVector
 from _kalpy.util import BaseFloatMatrixWriter, CompressedMatrixWriter
 from kalpy.data import Segment
 from kalpy.utils import generate_write_specifier
@@ -170,7 +170,7 @@ def mfcc_obj(self):
 
     def compute_mfccs(
         self,
-        segment: Segment,
+        segment: typing.Union[Segment, np.ndarray],
     ) -> np.ndarray:
         """
         Compute MFCC features for a segment
@@ -188,34 +188,44 @@ def compute_mfccs(
         mfccs = self.compute_mfccs_for_export(segment, compress=False)
         return mfccs.numpy()
 
-    def compute_mfccs_for_export(self, segment: Segment, compress: bool = True) -> FloatMatrixBase:
+    def compute_mfccs_for_export(
+        self, segment: typing.Union[Segment, np.ndarray, FloatVector], compress: bool = True
+    ) -> FloatMatrix:
         """
         Generate MFCCs for exporting to a kaldi archive
 
         Parameters
         ----------
         segment: :class:`~kalpy.feat.mfcc.Segment`
             Acoustic segment to generate MFCCs
+        compress: bool, defaults to True
+            Flag for whether returned matrix should be compressed
 
         Returns
         -------
-        :class:`_kalpy.matrix.FloatMatrixBase`
+        :class:`_kalpy.matrix.FloatMatrix`
             Feature matrix for the segment
         """
-        duration = None
-        if segment.end is not None and segment.begin is not None:
-            duration = segment.end - segment.begin
-        wave, sr = librosa.load(
-            segment.file_path,
-            sr=16000,
-            offset=segment.begin,
-            duration=duration,
-            mono=False,
-        )
-        wave = np.round(wave * 32768)
-        if len(wave.shape) == 2:
-            channel = 0 if segment.channel is None else segment.channel
-            wave = wave[channel, :]
+        if isinstance(segment, Segment):
+            duration = None
+            if segment.end is not None and segment.begin is not None:
+                duration = segment.end - segment.begin
+            wave, sr = librosa.load(
+                segment.file_path,
+                sr=16000,
+                offset=segment.begin,
+                duration=duration,
+                mono=False,
+            )
+            wave = np.round(wave * 32768)
+            if len(wave.shape) == 2:
+                channel = 0 if segment.channel is None else segment.channel
+                wave = wave[channel, :]
+        else:
+            wave = segment
+            if isinstance(wave, np.ndarray) and np.max(wave) < 1.0:
+                wave = np.round(wave * 32768)
+
         mfccs = self.mfcc_obj.compute(wave)
         if compress:
             mfccs = CompressedMatrix(mfccs)

diff --git a/kalpy/gmm/data.py b/kalpy/gmm/data.py
@@ -1,6 +1,7 @@
 """Data classes for GMM"""
 from __future__ import annotations
 
+import os
 import pathlib
 import sys
 import typing
@@ -352,6 +353,8 @@ def __init__(
         words_file_name: typing.Union[pathlib.Path, str] = None,
         likelihood_file_name: typing.Union[pathlib.Path, str] = None,
     ):
+        if not os.path.exists(file_name):
+            raise OSError(f"Specified file does not exist: {file_name}")
         self.file_name = str(file_name)
         self.read_specifier = generate_read_specifier(file_name)
 
@@ -510,6 +513,8 @@ class LatticeArchive:
     """
 
     def __init__(self, file_name: typing.Union[pathlib.Path, str], determinized: bool = True):
+        if not os.path.exists(file_name):
+            raise OSError(f"Specified file does not exist: {file_name}")
         self.file_name = str(file_name)
         self.determinized = determinized
         self.read_specifier = generate_read_specifier(file_name)

diff --git a/kalpy/ivector/data.py b/kalpy/ivector/data.py
@@ -1,6 +1,7 @@
 """Data classes for GMM"""
 from __future__ import annotations
 
+import os
 import typing
 
 from _kalpy.matrix import FloatVector
@@ -25,6 +26,8 @@ class IvectorArchive:
     """
 
     def __init__(self, file_name: PathLike, num_utterances_file_name: PathLike = None):
+        if not os.path.exists(file_name):
+            raise OSError(f"Specified file does not exist: {file_name}")
         self.file_name = str(file_name)
         self.num_utterances_file_name = num_utterances_file_name
         self.read_specifier = generate_read_specifier(file_name)
@@ -90,6 +93,8 @@ class GselectArchive:
     """
 
     def __init__(self, file_name: PathLike):
+        if not os.path.exists(file_name):
+            raise OSError(f"Specified file does not exist: {file_name}")
         self.file_name = str(file_name)
         self.read_specifier = generate_read_specifier(file_name)
         self.random_reader = RandomAccessInt32VectorVectorReader(self.read_specifier)