diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7ee6981..b158d10 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,7 +1,12 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Changelog for package ROS 2 Whisper ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +1.2.0 (2023-11-19) +------------------ +* `whisper_util`: Upgrade to `whisper.cpp` 1.5.0 release https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.0 (full CUDA backend) + 1.1.0 (2023-09-01) +------------------ * `whisper_demos`: Improved terminal output * `whisper_server`: Improved state machine diff --git a/README.md b/README.md index da5c790..eedc1b8 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ colcon build --symlink-install --cmake-args -DWHISPER_CUBLAS=On ## Demos Run the inference nodes (this will download models to `$HOME/.cache/whisper.cpp`): ```shell -ros2 launch whisper_bringup bringup.launch.py n_thread:=8 +ros2 launch whisper_bringup bringup.launch.py n_thread:=4 ``` Run a client node (activated on space bar press): ```shell diff --git a/audio_listener/package.xml b/audio_listener/package.xml index 1173439..bee4f4e 100644 --- a/audio_listener/package.xml +++ b/audio_listener/package.xml @@ -2,7 +2,7 @@ audio_listener - 1.1.0 + 1.2.0 Audio common replica. mhubii MIT diff --git a/audio_listener/setup.py b/audio_listener/setup.py index 78dde99..40b1f93 100644 --- a/audio_listener/setup.py +++ b/audio_listener/setup.py @@ -4,7 +4,7 @@ setup( name=package_name, - version="1.1.0", + version="1.2.0", packages=find_packages(exclude=["test"]), data_files=[ ("share/ament_index/resource_index/packages", ["resource/" + package_name]), diff --git a/whisper_bringup/launch/bringup.launch.py b/whisper_bringup/launch/bringup.launch.py index 80c0eff..f24c670 100644 --- a/whisper_bringup/launch/bringup.launch.py +++ b/whisper_bringup/launch/bringup.launch.py @@ -19,6 +19,7 @@ def generate_launch_description() -> LaunchDescription: ld.add_action(WhisperServerMixin.arg_model_name()) ld.add_action(WhisperServerMixin.arg_n_threads()) ld.add_action(WhisperServerMixin.arg_language()) + ld.add_action(WhisperServerMixin.arg_use_gpu()) ld.add_action(WhisperServerMixin.arg_batch_capacity()) ld.add_action(WhisperServerMixin.arg_buffer_capacity()) ld.add_action(WhisperServerMixin.arg_carry_over_capacity()) @@ -30,6 +31,7 @@ def generate_launch_description() -> LaunchDescription: WhisperServerMixin.param_model_name(), WhisperServerMixin.param_n_threads(), WhisperServerMixin.param_language(), + WhisperServerMixin.param_use_gpu(), WhisperServerMixin.param_batch_capacity(), WhisperServerMixin.param_buffer_capacity(), WhisperServerMixin.param_carry_over_capacity(), diff --git a/whisper_bringup/package.xml b/whisper_bringup/package.xml index dea8663..f94d2a8 100644 --- a/whisper_bringup/package.xml +++ b/whisper_bringup/package.xml @@ -2,7 +2,7 @@ whisper_bringup - 1.1.0 + 1.2.0 TODO: Package description mhubii MIT diff --git a/whisper_cpp_vendor/CMakeLists.txt b/whisper_cpp_vendor/CMakeLists.txt index 4d3b8e5..504e8ed 100644 --- a/whisper_cpp_vendor/CMakeLists.txt +++ b/whisper_cpp_vendor/CMakeLists.txt @@ -10,8 +10,8 @@ include(FetchContent) find_package(ament_cmake REQUIRED) set(WHISPER_VERSION_MAJOR 1 CACHE STRING "Major whisper.cpp version.") -set(WHISPER_VERSION_MINOR 4 CACHE STRING "Minor whisper.cpp version.") -set(WHISPER_VERSION_PATCH 2 CACHE STRING "Patch whisper.cpp version.") +set(WHISPER_VERSION_MINOR 5 CACHE STRING "Minor whisper.cpp version.") +set(WHISPER_VERSION_PATCH 0 CACHE STRING "Patch whisper.cpp version.") FetchContent_Declare( whisper @@ -21,12 +21,23 @@ FetchContent_Declare( FetchContent_MakeAvailable(whisper) +####################################################################### # note that target properties need change as whisper.cpp CMake is buggy +####################################################################### set_target_properties( whisper PROPERTIES INTERFACE_INCLUDE_DIRECTORIES $ +) + +# install ggml header +install( + FILES ${whisper_SOURCE_DIR}/ggml.h + DESTINATION include ) +############## +# end of fixes +############## ament_export_targets(export_whisper HAS_LIBRARY_TARGET) diff --git a/whisper_cpp_vendor/package.xml b/whisper_cpp_vendor/package.xml index af45c55..1cf8d4d 100644 --- a/whisper_cpp_vendor/package.xml +++ b/whisper_cpp_vendor/package.xml @@ -2,7 +2,7 @@ whisper_cpp_vendor - 1.1.0 + 1.2.0 Vendor package for whisper.cpp. mhubii MIT diff --git a/whisper_demos/package.xml b/whisper_demos/package.xml index 7e8a69a..4cc79bc 100644 --- a/whisper_demos/package.xml +++ b/whisper_demos/package.xml @@ -2,7 +2,7 @@ whisper_demos - 1.1.0 + 1.2.0 Demos for using the ROS 2 whisper package. mhubii MIT diff --git a/whisper_demos/setup.py b/whisper_demos/setup.py index 5a1f143..d21b020 100644 --- a/whisper_demos/setup.py +++ b/whisper_demos/setup.py @@ -4,7 +4,7 @@ setup( name=package_name, - version="1.1.0", + version="1.2.0", packages=find_packages(exclude=["test"]), data_files=[ ("share/ament_index/resource_index/packages", ["resource/" + package_name]), diff --git a/whisper_msgs/package.xml b/whisper_msgs/package.xml index 3edc307..a1991c9 100644 --- a/whisper_msgs/package.xml +++ b/whisper_msgs/package.xml @@ -2,7 +2,7 @@ whisper_msgs - 1.1.0 + 1.2.0 Messages for the ROS 2 whisper package mhubii MIT diff --git a/whisper_server/config/whisper.yaml b/whisper_server/config/whisper.yaml index ef04f03..af89bd6 100644 --- a/whisper_server/config/whisper.yaml +++ b/whisper_server/config/whisper.yaml @@ -1,10 +1,11 @@ whisper: ros__parameters: # whisper - model_name: "tiny.en" # other models https://huggingface.co/ggerganov/whisper.cpp + model_name: "base.en" # other models https://huggingface.co/ggerganov/whisper.cpp language: "en" n_threads: 4 print_progress: false + use_gpu: true # buffer batch_capacity: 6 # seconds diff --git a/whisper_server/package.xml b/whisper_server/package.xml index 62e32eb..b8be3a4 100644 --- a/whisper_server/package.xml +++ b/whisper_server/package.xml @@ -2,7 +2,7 @@ whisper_server - 1.1.0 + 1.2.0 ROS 2 whisper.cpp inference server. mhubii MIT diff --git a/whisper_server/src/inference_node.cpp b/whisper_server/src/inference_node.cpp index c8e34b8..ce01cc9 100644 --- a/whisper_server/src/inference_node.cpp +++ b/whisper_server/src/inference_node.cpp @@ -42,12 +42,13 @@ void InferenceNode::declare_parameters_() { node_ptr_->declare_parameter("carry_over_capacity", 200); // whisper parameters - node_ptr_->declare_parameter("model_name", "tiny.en"); + node_ptr_->declare_parameter("model_name", "base.en"); // consider other parameters: // https://github.com/ggerganov/whisper.cpp/blob/a4bb2df36aeb4e6cfb0c1ca9fbcf749ef39cc852/whisper.h#L351 node_ptr_->declare_parameter("language", "en"); node_ptr_->declare_parameter("n_threads", 4); node_ptr_->declare_parameter("print_progress", false); + node_ptr_->declare_parameter("use_gpu", true); } void InferenceNode::initialize_whisper_() { @@ -71,9 +72,10 @@ void InferenceNode::initialize_whisper_() { RCLCPP_INFO(node_ptr_->get_logger(), "Model %s initialized.", model_name.c_str()); language_ = node_ptr_->get_parameter("language").as_string(); - whisper_->params.language = language_.c_str(); - whisper_->params.n_threads = node_ptr_->get_parameter("n_threads").as_int(); - whisper_->params.print_progress = node_ptr_->get_parameter("print_progress").as_bool(); + whisper_->wparams.language = language_.c_str(); + whisper_->wparams.n_threads = node_ptr_->get_parameter("n_threads").as_int(); + whisper_->wparams.print_progress = node_ptr_->get_parameter("print_progress").as_bool(); + whisper_->cparams.use_gpu = node_ptr_->get_parameter("use_gpu").as_bool(); } rcl_interfaces::msg::SetParametersResult @@ -81,9 +83,9 @@ InferenceNode::on_parameter_set_(const std::vector ¶meter rcl_interfaces::msg::SetParametersResult result; for (const auto ¶meter : parameters) { if (parameter.get_name() == "n_threads") { - whisper_->params.n_threads = parameter.as_int(); + whisper_->wparams.n_threads = parameter.as_int(); RCLCPP_INFO(node_ptr_->get_logger(), "Parameter %s set to %d.", parameter.get_name().c_str(), - whisper_->params.n_threads); + whisper_->wparams.n_threads); continue; } result.reason = "Parameter " + parameter.get_name() + " not handled."; @@ -143,7 +145,8 @@ void InferenceNode::on_inference_accepted_(const std::shared_ptrpublish_feedback(feedback); // update inference result - if (result->transcriptions.size() == batched_buffer_->batch_idx() + 1) { + if (result->transcriptions.size() == + static_cast(batched_buffer_->batch_idx() + 1)) { result->transcriptions[result->transcriptions.size() - 1] = feedback->transcription; } else { result->transcriptions.push_back(feedback->transcription); diff --git a/whisper_server/whisper_server_launch_mixin/whisper_server_mixin.py b/whisper_server/whisper_server_launch_mixin/whisper_server_mixin.py index b404422..b16a5ed 100644 --- a/whisper_server/whisper_server_launch_mixin/whisper_server_mixin.py +++ b/whisper_server/whisper_server_launch_mixin/whisper_server_mixin.py @@ -11,13 +11,14 @@ class InferenceMixin: def arg_model_name() -> DeclareLaunchArgument: return DeclareLaunchArgument( name="model_name", - default_value="tiny.en", + default_value="base.en", description="Model name for whisper.cpp. Refer to https://huggingface.co/ggerganov/whisper.cpp.", choices=[ "tiny.en", "tiny", "tiny.en", "base", + "base.en", "small.en", "small", "medium.en", @@ -44,6 +45,14 @@ def arg_language() -> DeclareLaunchArgument: choices=["en", "auto"], ) + @staticmethod + def arg_use_gpu() -> DeclareLaunchArgument: + return DeclareLaunchArgument( + name="use_gpu", + default_value="true", + description="Use GPU for inference.", + ) + @staticmethod def arg_batch_capacity() -> DeclareLaunchArgument: return DeclareLaunchArgument( @@ -70,7 +79,7 @@ def arg_carry_over_capacity() -> DeclareLaunchArgument: @staticmethod def param_model_name() -> Dict[str, LaunchConfiguration]: - return {"model_name": LaunchConfiguration("model_name", default="tiny.en")} + return {"model_name": LaunchConfiguration("model_name", default="base.en")} @staticmethod def param_n_threads() -> Dict[str, LaunchConfiguration]: @@ -80,6 +89,10 @@ def param_n_threads() -> Dict[str, LaunchConfiguration]: def param_language() -> Dict[str, LaunchConfiguration]: return {"language": LaunchConfiguration("language", default="en")} + @staticmethod + def param_use_gpu() -> Dict[str, LaunchConfiguration]: + return {"use_gpu": LaunchConfiguration("use_gpu", default="true")} + @staticmethod def param_batch_capacity() -> Dict[str, LaunchConfiguration]: return {"batch_capacity": LaunchConfiguration("batch_capacity", default="6")} diff --git a/whisper_util/include/whisper_util/model_manager.hpp b/whisper_util/include/whisper_util/model_manager.hpp index adfb420..33b6c8e 100644 --- a/whisper_util/include/whisper_util/model_manager.hpp +++ b/whisper_util/include/whisper_util/model_manager.hpp @@ -15,9 +15,9 @@ class ModelManager { const std::string &cache_path = std::string(std::getenv("HOME")) + "/.cache/whisper.cpp"); void mkdir(const std::string &path); - bool is_available(const std::string &model_name = "tiny.en"); - int make_available(const std::string &model_name = "tiny.en"); - std::string get_model_path(const std::string &model_name = "tiny.en"); + bool is_available(const std::string &model_name = "base.en"); + int make_available(const std::string &model_name = "base.en"); + std::string get_model_path(const std::string &model_name = "base.en"); protected: std::string model_name_to_file_name_(const std::string &model_name); diff --git a/whisper_util/include/whisper_util/whisper.hpp b/whisper_util/include/whisper_util/whisper.hpp index 3d353c4..4a176f8 100644 --- a/whisper_util/include/whisper_util/whisper.hpp +++ b/whisper_util/include/whisper_util/whisper.hpp @@ -19,7 +19,8 @@ class Whisper { std::vector tokens(); whisper_context *ctx; - whisper_full_params params; + whisper_full_params wparams; + whisper_context_params cparams; }; } // end of namespace whisper #endif // WHISPER_UTIL__WHISPER_HPP_ diff --git a/whisper_util/package.xml b/whisper_util/package.xml index e2af3e3..aacdf07 100644 --- a/whisper_util/package.xml +++ b/whisper_util/package.xml @@ -2,7 +2,7 @@ whisper_util - 1.1.0 + 1.2.0 ROS 2 wrapper for whisper.cpp. mhubii MIT diff --git a/whisper_util/src/whisper.cpp b/whisper_util/src/whisper.cpp index 0b6f451..23428f8 100644 --- a/whisper_util/src/whisper.cpp +++ b/whisper_util/src/whisper.cpp @@ -1,18 +1,18 @@ #include "whisper_util/whisper.hpp" namespace whisper { -Whisper::Whisper() { params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY); } +Whisper::Whisper() { wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY); } Whisper::Whisper(const std::string &model_path) { initialize(model_path); } Whisper::~Whisper() { whisper_free(ctx); } void Whisper::initialize(const std::string &model_path) { - ctx = whisper_init_from_file(model_path.c_str()); + ctx = whisper_init_from_file_with_params(model_path.c_str(), cparams); } std::string Whisper::forward(const std::vector &input) { - if (whisper_full(ctx, params, input.data(), input.size()) != 0) { + if (whisper_full(ctx, wparams, input.data(), input.size()) != 0) { return {}; } std::vector segments;