Skip to content

Commit

Permalink
Merge pull request #6 from mgonzs13/silero-vad-cpp
Browse files Browse the repository at this point in the history
Silero vad cpp
  • Loading branch information
mgonzs13 authored Dec 27, 2024
2 parents 8ad8369 + e16e9ab commit 218b40c
Show file tree
Hide file tree
Showing 30 changed files with 1,709 additions and 309 deletions.
8 changes: 2 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,12 @@ RUN apt-get update \
&& apt-get -y --quiet --no-install-recommends install \
gcc \
git \
wget \
portaudio19-dev \
python3 \
python3-pip
curl

WORKDIR /root/ros2_ws/src
RUN git clone https://github.com/mgonzs13/audio_common.git
WORKDIR /root/ros2_ws

RUN pip3 install -r src/requirements.txt
WORKDIR /root/ros2_ws
RUN rosdep install --from-paths src --ignore-src -r -y

# Install CUDA nvcc
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ To run whisper_ros with CUDA, first, you must install the [CUDA Toolkit](https:/
$ cd ~/ros2_ws/src
$ git clone https://github.com/mgonzs13/audio_common.git
$ git clone https://github.com/mgonzs13/whisper_ros.git
$ pip3 install -r whisper_ros/requirements.txt
$ cd ~/ros2_ws
$ rosdep install --from-paths src --ignore-src -r -y
$ colcon build --cmake-args -DGGML_CUDA=ON # add this for CUDA
Expand Down
37 changes: 37 additions & 0 deletions onnxruntime_vendor/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
cmake_minimum_required(VERSION 3.8)
project(onnxruntime_vendor)

# Set variables for the package
set(ONNXRUNTIME_VERSION "1.18.1") # Specify the desired ONNX Runtime version
set(ONNXRUNTIME_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNXRUNTIME_VERSION}/onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}.tgz")

# ROS 2 package configuration
find_package(ament_cmake REQUIRED)

# Define a vendor package installation directory
set(ONNXRUNTIME_INSTALL_DIR "${CMAKE_BINARY_DIR}/onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}")

# Add a custom target to download and extract the prebuilt ONNX Runtime
find_program(CURL_EXECUTABLE curl REQUIRED)

if(NOT CURL_EXECUTABLE)
message(FATAL_ERROR "curl is required to download ONNX Runtime but was not found.")
endif()

# Add custom command to download and extract the ONNX Runtime
add_custom_target(download_onnxruntime ALL
COMMENT "Downloading and extracting ONNX Runtime ${ONNXRUNTIME_VERSION}"
COMMAND ${CURL_EXECUTABLE} -L -o onnxruntime.tgz ${ONNXRUNTIME_URL} >/dev/null 2>&1
COMMAND ${CMAKE_COMMAND} -E tar xzf onnxruntime.tgz
)

# Install the ONNX Runtime library and include files
install(DIRECTORY ${ONNXRUNTIME_INSTALL_DIR}/lib DESTINATION .)
install(DIRECTORY ${ONNXRUNTIME_INSTALL_DIR}/include DESTINATION .)

# Export the onnxruntime library for downstream packages
ament_export_include_directories(include)
ament_export_libraries(onnxruntime)

# Export the package
ament_package()
18 changes: 18 additions & 0 deletions onnxruntime_vendor/package.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?xml version="1.0"?>
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
<package format="3">
<name>onnxruntime_vendor</name>
<version>1.3.1</version>
<description>Vendor package for onnxruntime</description>
<maintainer email="[email protected]">Miguel Ángel González Santamarta</maintainer>
<license>MIT</license>

<buildtool_depend>ament_cmake</buildtool_depend>

<test_depend>ament_lint_auto</test_depend>
<test_depend>ament_lint_common</test_depend>

<export>
<build_type>ament_cmake</build_type>
</export>
</package>
4 changes: 0 additions & 4 deletions requirements.txt

This file was deleted.

94 changes: 94 additions & 0 deletions whisper_bringup/launch/silero-vad.launch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# MIT License

# Copyright (c) 2023 Miguel Ángel González Santamarta

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.


from launch_ros.actions import Node
from launch import LaunchDescription, LaunchContext
from launch.substitutions import LaunchConfiguration
from launch.actions import OpaqueFunction, DeclareLaunchArgument
from huggingface_hub import hf_hub_download


def generate_launch_description():

def run_silero_vad(context: LaunchContext, repo, file, model_path):
repo = str(context.perform_substitution(repo))
file = str(context.perform_substitution(file))
model_path = str(context.perform_substitution(model_path))

if not model_path:
model_path = hf_hub_download(
repo_id=repo, filename=file, force_download=False
)

return (
Node(
package="whisper_ros",
executable="silero_vad_node",
name="silero_vad_node",
namespace="whisper",
parameters=[
{
"enabled": LaunchConfiguration("enabled", default=True),
"model_path": model_path,
"sample_rate": LaunchConfiguration("sample_rate", default=16000),
"frame_size_ms": LaunchConfiguration("frame_size_ms", default=32),
"threshold": LaunchConfiguration("threshold", default=0.5),
"min_silence_ms": LaunchConfiguration(
"min_silence_ms", default=128
),
"speech_pad_ms": LaunchConfiguration("speech_pad_ms", default=32),
}
],
remappings=[("audio", "/audio/in")],
),
)

model_repo = LaunchConfiguration("model_repo")
model_repo_cmd = DeclareLaunchArgument(
"model_repo",
default_value="mgonzs13/silero-vad-onnx",
description="Hugging Face model repo",
)

model_filename = LaunchConfiguration("model_filename")
model_filename_cmd = DeclareLaunchArgument(
"model_filename",
default_value="silero_vad.onnx",
description="Hugging Face model filename",
)

model_path = LaunchConfiguration("model_path")
model_path_cmd = DeclareLaunchArgument(
"model_path", default_value="", description="Local path to the model file"
)

return LaunchDescription(
[
model_repo_cmd,
model_filename_cmd,
model_path_cmd,
OpaqueFunction(
function=run_silero_vad, args=[model_repo, model_filename, model_path]
),
]
)
76 changes: 54 additions & 22 deletions whisper_bringup/launch/whisper.launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@
# SOFTWARE.


from launch import LaunchDescription, LaunchContext
import os
from launch_ros.actions import Node
from launch import LaunchDescription, LaunchContext
from launch.conditions import IfCondition, UnlessCondition
from launch.substitutions import LaunchConfiguration, PythonExpression
from launch.actions import OpaqueFunction, DeclareLaunchArgument
from launch.launch_description_sources import PythonLaunchDescriptionSource
from launch.actions import OpaqueFunction, DeclareLaunchArgument, IncludeLaunchDescription
from ament_index_python.packages import get_package_share_directory
from huggingface_hub import hf_hub_download
from launch.conditions import IfCondition, UnlessCondition


def generate_launch_description():
Expand Down Expand Up @@ -126,19 +129,42 @@ def run_whisper(context: LaunchContext, repo, file, model_path):
model_repo_cmd = DeclareLaunchArgument(
"model_repo",
default_value="ggerganov/whisper.cpp",
description="Hugging Face model repo",
description="Hugging Face model repo for Whisper",
)

model_filename = LaunchConfiguration("model_filename")
model_filename_cmd = DeclareLaunchArgument(
"model_filename",
default_value="ggml-large-v3-turbo-q5_0.bin",
description="Hugging Face model filename",
description="Hugging Face model filename for Whisper",
)

model_path = LaunchConfiguration("model_path")
model_path_cmd = DeclareLaunchArgument(
"model_path", default_value="", description="Local path to the model file"
"model_path",
default_value="",
description="Local path to the model file for Whisper",
)

silero_vad_model_repo = LaunchConfiguration("silero_vad_model_repo")
silero_vad_model_repo_cmd = DeclareLaunchArgument(
"silero_vad_model_repo",
default_value="mgonzs13/silero-vad-onnx",
description="Hugging Face model repo for SileroVAD",
)

silero_vad_model_filename = LaunchConfiguration("silero_vad_model_filename")
silero_vad_model_filename_cmd = DeclareLaunchArgument(
"silero_vad_model_filename",
default_value="silero_vad.onnx",
description="Hugging Face model filename for SileroVAD",
)

silero_vad_model_path = LaunchConfiguration("silero_vad_model_path")
silero_vad_model_path_cmd = DeclareLaunchArgument(
"silero_vad_model_path",
default_value="",
description="Local path to the model file for SileroVAD",
)

return LaunchDescription(
Expand All @@ -147,24 +173,30 @@ def run_whisper(context: LaunchContext, repo, file, model_path):
model_repo_cmd,
model_filename_cmd,
model_path_cmd,
silero_vad_model_repo_cmd,
silero_vad_model_filename_cmd,
silero_vad_model_path_cmd,
OpaqueFunction(
function=run_whisper, args=[model_repo, model_filename, model_path]
function=run_whisper,
args=[model_repo, model_filename, model_path],
),
Node(
package="whisper_ros",
executable="silero_vad_node",
name="silero_vad_node",
namespace="whisper",
parameters=[
{
"enabled": LaunchConfiguration(
"vad_enabled",
default=PythonExpression([LaunchConfiguration("stream")]),
),
"threshold": LaunchConfiguration("vad_threshold", default=0.5),
}
],
remappings=[("audio", "/audio/in")],
IncludeLaunchDescription(
PythonLaunchDescriptionSource(
os.path.join(
get_package_share_directory("whisper_bringup"),
"launch",
"silero-vad.launch.py",
)
),
launch_arguments={
"enabled": LaunchConfiguration(
"vad_enabled",
default=PythonExpression([LaunchConfiguration("stream")]),
),
"model_repo": silero_vad_model_repo,
"model_filename": silero_vad_model_filename,
"model_path": silero_vad_model_path,
}.items(),
),
Node(
package="audio_common",
Expand Down
40 changes: 34 additions & 6 deletions whisper_ros/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,24 @@ find_package(rclcpp_action REQUIRED)
find_package(rclcpp_lifecycle REQUIRED)
find_package(std_msgs REQUIRED)
find_package(std_srvs REQUIRED)
find_package(audio_common_msgs REQUIRED)
find_package(whisper_msgs REQUIRED)
find_package(whisper_cpp_vendor REQUIRED)
find_package(onnxruntime_vendor REQUIRED)
find_library(PORTAUDIO_LIB portaudio REQUIRED)

include_directories(include)
include_directories(
include
${PORTAUDIO_INCLUDE_DIR}
)

# whisper_node
add_executable(whisper_node
src/whisper_main.cpp
src/whisper_ros/whisper_node.cpp
src/whisper_ros/whisper_base_node.cpp
src/whisper_ros/whisper.cpp
src/whisper_utils/logs.cpp
)
target_link_libraries(whisper_node
whisper_cpp_vendor::grammar
Expand All @@ -36,11 +44,13 @@ ament_target_dependencies(whisper_node
whisper_cpp_vendor
)

# whisper_server_node
add_executable(whisper_server_node
src/whisper_server_main.cpp
src/whisper_ros/whisper_server_node.cpp
src/whisper_ros/whisper_base_node.cpp
src/whisper_ros/whisper.cpp
src/whisper_utils/logs.cpp
)
target_link_libraries(whisper_server_node
whisper_cpp_vendor::grammar
Expand All @@ -56,10 +66,29 @@ ament_target_dependencies(whisper_server_node
whisper_cpp_vendor
)

ament_export_dependencies(whisper_cpp_vendor)
# silero_vad_node
add_executable(silero_vad_node
src/silero_vad_main.cpp
src/silero_vad/silero_vad_node.cpp
src/silero_vad/vad_iterator.cpp
src/silero_vad/timestamp.cpp
src/whisper_utils/logs.cpp
)
target_link_libraries(silero_vad_node ${PORTAUDIO_LIB})
ament_target_dependencies(silero_vad_node
rclcpp
rclcpp_lifecycle
std_msgs
std_srvs
audio_common_msgs
onnxruntime_vendor
)

# Export dependencies
ament_export_dependencies(whisper_cpp_vendor)
ament_export_dependencies(onnxruntime_vendor)

# INSTALL
# Install
install(TARGETS
whisper_node
DESTINATION lib/${PROJECT_NAME}
Expand All @@ -70,10 +99,9 @@ install(TARGETS
DESTINATION lib/${PROJECT_NAME}
)

install(PROGRAMS
whisper_ros/silero_vad_node.py
install(TARGETS
silero_vad_node
DESTINATION lib/${PROJECT_NAME}
RENAME silero_vad_node
)

ament_package()
Loading

0 comments on commit 218b40c

Please sign in to comment.