Merge pull request #6 from mgonzs13/silero-vad-cpp

Silero vad cpp
mgonzs13 · Dec 27, 2024 · 218b40c · 218b40c
2 parents 8ad8369 + e16e9ab
commit 218b40c
Show file tree

Hide file tree

Showing 30 changed files with 1,709 additions and 309 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -11,16 +11,12 @@ RUN apt-get update \
     && apt-get -y --quiet --no-install-recommends install \
     gcc \
     git \
-    wget \
-    portaudio19-dev \
-    python3 \
-    python3-pip
+    curl
 
 WORKDIR /root/ros2_ws/src
 RUN git clone https://github.com/mgonzs13/audio_common.git
-WORKDIR /root/ros2_ws
 
-RUN pip3 install -r src/requirements.txt
+WORKDIR /root/ros2_ws
 RUN rosdep install --from-paths src --ignore-src -r -y
 
 # Install CUDA nvcc

diff --git a/README.md b/README.md
@@ -33,7 +33,6 @@ To run whisper_ros with CUDA, first, you must install the [CUDA Toolkit](https:/
 $ cd ~/ros2_ws/src
 $ git clone https://github.com/mgonzs13/audio_common.git
 $ git clone https://github.com/mgonzs13/whisper_ros.git
-$ pip3 install -r whisper_ros/requirements.txt
 $ cd ~/ros2_ws
 $ rosdep install --from-paths src --ignore-src -r -y
 $ colcon build --cmake-args -DGGML_CUDA=ON # add this for CUDA

diff --git a/onnxruntime_vendor/CMakeLists.txt b/onnxruntime_vendor/CMakeLists.txt
@@ -0,0 +1,37 @@
+cmake_minimum_required(VERSION 3.8)
+project(onnxruntime_vendor)
+
+# Set variables for the package
+set(ONNXRUNTIME_VERSION "1.18.1")  # Specify the desired ONNX Runtime version
+set(ONNXRUNTIME_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNXRUNTIME_VERSION}/onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}.tgz")
+
+# ROS 2 package configuration
+find_package(ament_cmake REQUIRED)
+
+# Define a vendor package installation directory
+set(ONNXRUNTIME_INSTALL_DIR "${CMAKE_BINARY_DIR}/onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}")
+
+# Add a custom target to download and extract the prebuilt ONNX Runtime
+find_program(CURL_EXECUTABLE curl REQUIRED)
+
+if(NOT CURL_EXECUTABLE)
+    message(FATAL_ERROR "curl is required to download ONNX Runtime but was not found.")
+endif()
+
+# Add custom command to download and extract the ONNX Runtime
+add_custom_target(download_onnxruntime ALL
+    COMMENT "Downloading and extracting ONNX Runtime ${ONNXRUNTIME_VERSION}"
+    COMMAND ${CURL_EXECUTABLE} -L -o onnxruntime.tgz ${ONNXRUNTIME_URL} >/dev/null 2>&1
+    COMMAND ${CMAKE_COMMAND} -E tar xzf onnxruntime.tgz
+)
+
+# Install the ONNX Runtime library and include files
+install(DIRECTORY ${ONNXRUNTIME_INSTALL_DIR}/lib DESTINATION .)
+install(DIRECTORY ${ONNXRUNTIME_INSTALL_DIR}/include DESTINATION .)
+
+# Export the onnxruntime library for downstream packages
+ament_export_include_directories(include)
+ament_export_libraries(onnxruntime)
+
+# Export the package
+ament_package()
diff --git a/onnxruntime_vendor/package.xml b/onnxruntime_vendor/package.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>onnxruntime_vendor</name>
+  <version>1.3.1</version>
+  <description>Vendor package for onnxruntime</description>
+  <maintainer email="[email protected]">Miguel Ángel González Santamarta</maintainer>
+  <license>MIT</license>
+
+  <buildtool_depend>ament_cmake</buildtool_depend>
+
+  <test_depend>ament_lint_auto</test_depend>
+  <test_depend>ament_lint_common</test_depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
diff --git a/requirements.txt b/requirements.txt
diff --git a/whisper_bringup/launch/silero-vad.launch.py b/whisper_bringup/launch/silero-vad.launch.py
@@ -0,0 +1,94 @@
+# MIT License
+
+# Copyright (c) 2023  Miguel Ángel González Santamarta
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+
+from launch_ros.actions import Node
+from launch import LaunchDescription, LaunchContext
+from launch.substitutions import LaunchConfiguration
+from launch.actions import OpaqueFunction, DeclareLaunchArgument
+from huggingface_hub import hf_hub_download
+
+
+def generate_launch_description():
+
+    def run_silero_vad(context: LaunchContext, repo, file, model_path):
+        repo = str(context.perform_substitution(repo))
+        file = str(context.perform_substitution(file))
+        model_path = str(context.perform_substitution(model_path))
+
+        if not model_path:
+            model_path = hf_hub_download(
+                repo_id=repo, filename=file, force_download=False
+            )
+
+        return (
+            Node(
+                package="whisper_ros",
+                executable="silero_vad_node",
+                name="silero_vad_node",
+                namespace="whisper",
+                parameters=[
+                    {
+                        "enabled": LaunchConfiguration("enabled", default=True),
+                        "model_path": model_path,
+                        "sample_rate": LaunchConfiguration("sample_rate", default=16000),
+                        "frame_size_ms": LaunchConfiguration("frame_size_ms", default=32),
+                        "threshold": LaunchConfiguration("threshold", default=0.5),
+                        "min_silence_ms": LaunchConfiguration(
+                            "min_silence_ms", default=128
+                        ),
+                        "speech_pad_ms": LaunchConfiguration("speech_pad_ms", default=32),
+                    }
+                ],
+                remappings=[("audio", "/audio/in")],
+            ),
+        )
+
+    model_repo = LaunchConfiguration("model_repo")
+    model_repo_cmd = DeclareLaunchArgument(
+        "model_repo",
+        default_value="mgonzs13/silero-vad-onnx",
+        description="Hugging Face model repo",
+    )
+
+    model_filename = LaunchConfiguration("model_filename")
+    model_filename_cmd = DeclareLaunchArgument(
+        "model_filename",
+        default_value="silero_vad.onnx",
+        description="Hugging Face model filename",
+    )
+
+    model_path = LaunchConfiguration("model_path")
+    model_path_cmd = DeclareLaunchArgument(
+        "model_path", default_value="", description="Local path to the model file"
+    )
+
+    return LaunchDescription(
+        [
+            model_repo_cmd,
+            model_filename_cmd,
+            model_path_cmd,
+            OpaqueFunction(
+                function=run_silero_vad, args=[model_repo, model_filename, model_path]
+            ),
+        ]
+    )
diff --git a/whisper_bringup/launch/whisper.launch.py b/whisper_bringup/launch/whisper.launch.py
@@ -21,12 +21,15 @@
 # SOFTWARE.
 
 
-from launch import LaunchDescription, LaunchContext
+import os
 from launch_ros.actions import Node
+from launch import LaunchDescription, LaunchContext
+from launch.conditions import IfCondition, UnlessCondition
 from launch.substitutions import LaunchConfiguration, PythonExpression
-from launch.actions import OpaqueFunction, DeclareLaunchArgument
+from launch.launch_description_sources import PythonLaunchDescriptionSource
+from launch.actions import OpaqueFunction, DeclareLaunchArgument, IncludeLaunchDescription
+from ament_index_python.packages import get_package_share_directory
 from huggingface_hub import hf_hub_download
-from launch.conditions import IfCondition, UnlessCondition
 
 
 def generate_launch_description():
@@ -126,19 +129,42 @@ def run_whisper(context: LaunchContext, repo, file, model_path):
     model_repo_cmd = DeclareLaunchArgument(
         "model_repo",
         default_value="ggerganov/whisper.cpp",
-        description="Hugging Face model repo",
+        description="Hugging Face model repo for Whisper",
     )
 
     model_filename = LaunchConfiguration("model_filename")
     model_filename_cmd = DeclareLaunchArgument(
         "model_filename",
         default_value="ggml-large-v3-turbo-q5_0.bin",
-        description="Hugging Face model filename",
+        description="Hugging Face model filename for Whisper",
     )
 
     model_path = LaunchConfiguration("model_path")
     model_path_cmd = DeclareLaunchArgument(
-        "model_path", default_value="", description="Local path to the model file"
+        "model_path",
+        default_value="",
+        description="Local path to the model file for Whisper",
+    )
+
+    silero_vad_model_repo = LaunchConfiguration("silero_vad_model_repo")
+    silero_vad_model_repo_cmd = DeclareLaunchArgument(
+        "silero_vad_model_repo",
+        default_value="mgonzs13/silero-vad-onnx",
+        description="Hugging Face model repo for SileroVAD",
+    )
+
+    silero_vad_model_filename = LaunchConfiguration("silero_vad_model_filename")
+    silero_vad_model_filename_cmd = DeclareLaunchArgument(
+        "silero_vad_model_filename",
+        default_value="silero_vad.onnx",
+        description="Hugging Face model filename for SileroVAD",
+    )
+
+    silero_vad_model_path = LaunchConfiguration("silero_vad_model_path")
+    silero_vad_model_path_cmd = DeclareLaunchArgument(
+        "silero_vad_model_path",
+        default_value="",
+        description="Local path to the model file for SileroVAD",
     )
 
     return LaunchDescription(
@@ -147,24 +173,30 @@ def run_whisper(context: LaunchContext, repo, file, model_path):
             model_repo_cmd,
             model_filename_cmd,
             model_path_cmd,
+            silero_vad_model_repo_cmd,
+            silero_vad_model_filename_cmd,
+            silero_vad_model_path_cmd,
             OpaqueFunction(
-                function=run_whisper, args=[model_repo, model_filename, model_path]
+                function=run_whisper,
+                args=[model_repo, model_filename, model_path],
             ),
-            Node(
-                package="whisper_ros",
-                executable="silero_vad_node",
-                name="silero_vad_node",
-                namespace="whisper",
-                parameters=[
-                    {
-                        "enabled": LaunchConfiguration(
-                            "vad_enabled",
-                            default=PythonExpression([LaunchConfiguration("stream")]),
-                        ),
-                        "threshold": LaunchConfiguration("vad_threshold", default=0.5),
-                    }
-                ],
-                remappings=[("audio", "/audio/in")],
+            IncludeLaunchDescription(
+                PythonLaunchDescriptionSource(
+                    os.path.join(
+                        get_package_share_directory("whisper_bringup"),
+                        "launch",
+                        "silero-vad.launch.py",
+                    )
+                ),
+                launch_arguments={
+                    "enabled": LaunchConfiguration(
+                        "vad_enabled",
+                        default=PythonExpression([LaunchConfiguration("stream")]),
+                    ),
+                    "model_repo": silero_vad_model_repo,
+                    "model_filename": silero_vad_model_filename,
+                    "model_path": silero_vad_model_path,
+                }.items(),
             ),
             Node(
                 package="audio_common",

diff --git a/whisper_ros/CMakeLists.txt b/whisper_ros/CMakeLists.txt
@@ -12,16 +12,24 @@ find_package(rclcpp_action REQUIRED)
 find_package(rclcpp_lifecycle REQUIRED)
 find_package(std_msgs REQUIRED)
 find_package(std_srvs REQUIRED)
+find_package(audio_common_msgs REQUIRED)
 find_package(whisper_msgs REQUIRED)
 find_package(whisper_cpp_vendor REQUIRED)
+find_package(onnxruntime_vendor REQUIRED)
+find_library(PORTAUDIO_LIB portaudio REQUIRED)
 
-include_directories(include)
+include_directories(
+  include
+  ${PORTAUDIO_INCLUDE_DIR}
+)
 
+# whisper_node
 add_executable(whisper_node
   src/whisper_main.cpp
   src/whisper_ros/whisper_node.cpp
   src/whisper_ros/whisper_base_node.cpp
   src/whisper_ros/whisper.cpp
+  src/whisper_utils/logs.cpp
 )
 target_link_libraries(whisper_node 
   whisper_cpp_vendor::grammar
@@ -36,11 +44,13 @@ ament_target_dependencies(whisper_node
   whisper_cpp_vendor
 )
 
+# whisper_server_node
 add_executable(whisper_server_node
   src/whisper_server_main.cpp
   src/whisper_ros/whisper_server_node.cpp
   src/whisper_ros/whisper_base_node.cpp
   src/whisper_ros/whisper.cpp
+  src/whisper_utils/logs.cpp
 )
 target_link_libraries(whisper_server_node
   whisper_cpp_vendor::grammar
@@ -56,10 +66,29 @@ ament_target_dependencies(whisper_server_node
   whisper_cpp_vendor
 )
 
-ament_export_dependencies(whisper_cpp_vendor)
+# silero_vad_node
+add_executable(silero_vad_node
+  src/silero_vad_main.cpp
+  src/silero_vad/silero_vad_node.cpp
+  src/silero_vad/vad_iterator.cpp
+  src/silero_vad/timestamp.cpp
+  src/whisper_utils/logs.cpp
+)
+target_link_libraries(silero_vad_node ${PORTAUDIO_LIB})
+ament_target_dependencies(silero_vad_node
+  rclcpp
+  rclcpp_lifecycle
+  std_msgs
+  std_srvs
+  audio_common_msgs
+  onnxruntime_vendor
+)
 
+# Export dependencies
+ament_export_dependencies(whisper_cpp_vendor)
+ament_export_dependencies(onnxruntime_vendor)
 
-# INSTALL
+# Install
 install(TARGETS
   whisper_node
   DESTINATION lib/${PROJECT_NAME}
@@ -70,10 +99,9 @@ install(TARGETS
   DESTINATION lib/${PROJECT_NAME}
 )
 
-install(PROGRAMS
-  whisper_ros/silero_vad_node.py
+install(TARGETS
+  silero_vad_node
   DESTINATION lib/${PROJECT_NAME}
-  RENAME silero_vad_node
 )
 
 ament_package()