Skip to content

Commit

Permalink
whisper logs added to vad_iterator
Browse files Browse the repository at this point in the history
  • Loading branch information
mgonzs13 committed Dec 27, 2024
1 parent 16bb406 commit eb6fae6
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 3 deletions.
1 change: 1 addition & 0 deletions whisper_ros/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ add_executable(silero_vad_node
src/silero_vad/silero_vad_node.cpp
src/silero_vad/vad_iterator.cpp
src/silero_vad/timestamp.cpp
src/whisper_utils/logs.cpp
)
target_link_libraries(silero_vad_node ${PORTAUDIO_LIB})
ament_target_dependencies(silero_vad_node
Expand Down
16 changes: 13 additions & 3 deletions whisper_ros/src/silero_vad/vad_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <vector>

#include "silero_vad/vad_iterator.hpp"
#include "whisper_utils/logs.hpp"

using namespace silero_vad;

Expand All @@ -48,9 +49,11 @@ VadIterator::VadIterator(const std::string &model_path, int sample_rate,
try {
this->init_onnx_model(model_path);
} catch (const std::exception &e) {
throw std::runtime_error("Failed to initialize ONNX model: " +
std::string(e.what()));
WHISPER_LOG_ERROR("Failed to initialize ONNX model: %s", e.what());
return;
}

WHISPER_LOG_INFO("SileroVAD Iterator started");
}

void VadIterator::init_onnx_model(const std::string &model_path) {
Expand All @@ -77,6 +80,9 @@ void VadIterator::reset_states() {
}

Timestamp VadIterator::predict(const std::vector<float> &data) {

WHISPER_LOG_INFO("Processing audio data");

// Pre-fill input with context
this->input.clear();
this->input.insert(this->input.end(), this->context.begin(),
Expand Down Expand Up @@ -107,14 +113,16 @@ Timestamp VadIterator::predict(const std::vector<float> &data) {
this->ort_inputs.data(), this->ort_inputs.size(),
this->output_node_names.data(), this->output_node_names.size());
} catch (const std::exception &e) {
throw std::runtime_error("ONNX inference failed: " + std::string(e.what()));
WHISPER_LOG_ERROR("ONNX inference failed: %s", e.what());
return Timestamp(-1, -1, 0.0f);
}

// Process output
float speech_prob = this->ort_outputs[0].GetTensorMutableData<float>()[0];
float *updated_state = this->ort_outputs[1].GetTensorMutableData<float>();
std::copy(updated_state, updated_state + this->state.size(),
this->state.begin());
WHISPER_LOG_DEBUG("Speech probability %f", speech_prob);

// Update context with the last 64 samples of data
this->context.assign(data.end() - context_size, data.end());
Expand All @@ -131,6 +139,7 @@ Timestamp VadIterator::predict(const std::vector<float> &data) {
int start_timestwamp = this->current_sample - this->speech_pad_samples -
this->window_size_samples;
this->triggered = true;
WHISPER_LOG_DEBUG("Speech starts at %d", start_timestwamp);
return Timestamp(start_timestwamp, -1, speech_prob);
}
}
Expand All @@ -145,6 +154,7 @@ Timestamp VadIterator::predict(const std::vector<float> &data) {
this->temp_end + this->speech_pad_samples - this->window_size_samples;
this->triggered = false;
this->temp_end = 0;
WHISPER_LOG_DEBUG("Speech ends at %d", end_timestamp);
return Timestamp(-1, end_timestamp, speech_prob);
}
}
Expand Down

0 comments on commit eb6fae6

Please sign in to comment.