From 6eb45d551e8964fba5e2907ae15d8986927d33b6 Mon Sep 17 00:00:00 2001
From: Nathan Corral <nathanbcorral@gmail.com>
Date: Wed, 11 Dec 2024 10:51:07 +0100
Subject: [PATCH 1/7] Update whisper cpp to version 1.7.2

---
 whisper_cpp_vendor/CMakeLists.txt | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/whisper_cpp_vendor/CMakeLists.txt b/whisper_cpp_vendor/CMakeLists.txt
index e73bc0d..8253525 100644
--- a/whisper_cpp_vendor/CMakeLists.txt
+++ b/whisper_cpp_vendor/CMakeLists.txt
@@ -10,7 +10,7 @@ include(FetchContent)
 find_package(ament_cmake REQUIRED)
 
 set(WHISPER_VERSION_MAJOR 1 CACHE STRING "Major whisper.cpp version.")
-set(WHISPER_VERSION_MINOR 6 CACHE STRING "Minor whisper.cpp version.")
+set(WHISPER_VERSION_MINOR 7 CACHE STRING "Minor whisper.cpp version.")
 set(WHISPER_VERSION_PATCH 2 CACHE STRING "Patch whisper.cpp version.")
 
 FetchContent_Declare(
@@ -34,20 +34,38 @@ set_target_properties(
 
 # install ggml header
 install(
-  FILES ${whisper_SOURCE_DIR}/ggml.h
+  FILES ${whisper_SOURCE_DIR}/ggml/include/ggml.h
   DESTINATION include
 )
 ##############
 # end of fixes
 ##############
+# Export header files for downstream packages
+install(
+  DIRECTORY
+    ${whisper_SOURCE_DIR}/include/
+    ${whisper_SOURCE_DIR}/ggml/include/
+  DESTINATION include
+)
+
+install(
+  TARGETS ggml
+  EXPORT export_whisper
+  LIBRARY DESTINATION lib
+  INCLUDES DESTINATION include
+)
 
 ament_export_targets(export_whisper HAS_LIBRARY_TARGET)
 
+# Install the library
 install(
   TARGETS whisper
   EXPORT export_whisper
   LIBRARY DESTINATION lib
+  ARCHIVE DESTINATION lib
+  RUNTIME DESTINATION bin
   INCLUDES DESTINATION include
+  PUBLIC_HEADER DESTINATION include
 )
 
 ament_package()

From 1fae935d0eceb5c4d82c7243efa5957ad8d7fb7b Mon Sep 17 00:00:00 2001
From: Nathan Corral <nathanbcorral@gmail.com>
Date: Wed, 11 Dec 2024 10:58:43 +0100
Subject: [PATCH 2/7] Update whisper cpp to version 1.7.2

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 63033ef..70aa0ee 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ This example shows live transcription of first minute of the 6'th chapter in ***
 ```shell
 mkdir -p ros-ai/src && cd ros-ai/src && \
 git clone https://github.com/ros-ai/ros2_whisper.git && cd .. && \
-colcon build --symlink-install --cmake-args -DWHISPER_CUDA=On --no-warn-unused-cli
+colcon build --symlink-install --cmake-args -DGGML_CUDA=On --no-warn-unused-cli
 ```
 
 ## Demos

From cdbcb0e8b3c3d15f94c6ccf1b9cb9276cef4db59 Mon Sep 17 00:00:00 2001
From: Nathan Corral <nathanbcorral@gmail.com>
Date: Wed, 11 Dec 2024 11:22:55 +0100
Subject: [PATCH 3/7] Added necessary whisper.cpp mutex

---
 .../include/whisper_server/inference.hpp      |  2 +-
 whisper_server/src/inference.cpp              | 40 +++++++++++--------
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/whisper_server/include/whisper_server/inference.hpp b/whisper_server/include/whisper_server/inference.hpp
index d1510ff..ecb1916 100644
--- a/whisper_server/include/whisper_server/inference.hpp
+++ b/whisper_server/include/whisper_server/inference.hpp
@@ -6,7 +6,7 @@
 #include <numeric>
 #include <stdexcept>
 #include <string>
-// #include <mutex>
+#include <mutex>
 
 #include "rcl_interfaces/msg/set_parameters_result.hpp"
 #include "rclcpp/rclcpp.hpp"
diff --git a/whisper_server/src/inference.cpp b/whisper_server/src/inference.cpp
index 109244b..1edbbef 100644
--- a/whisper_server/src/inference.cpp
+++ b/whisper_server/src/inference.cpp
@@ -148,24 +148,30 @@ whisper_idl::msg::WhisperTokens Inference::create_message_() {
 }
 
 bool Inference::run_inference_(whisper_idl::msg::WhisperTokens &result) {
-  const auto& [data, timestamp] = audio_ring_->peak();
-  result.stamp = chrono_to_ros_msg(timestamp);
-
-  inference_(data, result);
-
-  // Print warning if inference takes too long for audio size
-  auto duration = std::chrono::milliseconds(result.inference_duration);
-  auto max_runtime_for_audio_size = whisper::count_to_time(data.size());
-  if ( duration > max_runtime_for_audio_size ){
-        auto timeout_duration_ms = max_runtime_for_audio_size.count();
-        RCLCPP_WARN(get_logger(),
-              "Inference took longer than audio buffer size. This leads to un-inferenced audio "
-              "data. Consider increasing thread number or compile with accelerator support. \n "
-              "\t Inference Duration:   %lld,  Timeout after  %lld", 
-              static_cast<long long>(duration.count()), 
-              static_cast<long long>(timeout_duration_ms));
+  if ( whisper_mutex_.try_lock() ) {
+    const auto& [data, timestamp] = audio_ring_->peak();
+    result.stamp = chrono_to_ros_msg(timestamp);
+
+    inference_(data, result);
+
+    // Print warning if inference takes too long for audio size
+    auto duration = std::chrono::milliseconds(result.inference_duration);
+    auto max_runtime_for_audio_size = whisper::count_to_time(data.size());
+    if ( duration > max_runtime_for_audio_size ){
+          auto timeout_duration_ms = max_runtime_for_audio_size.count();
+          RCLCPP_WARN(get_logger(),
+                "Inference took longer than audio buffer size. This leads to un-inferenced audio "
+                "data. Consider increasing thread number or compile with accelerator support. \n "
+                "\t Inference Duration:   %lld,  Timeout after  %lld", 
+                static_cast<long long>(duration.count()), 
+                static_cast<long long>(timeout_duration_ms));
+    }
+    whisper_mutex_.unlock();
+    return true;
+  } else {
+    RCLCPP_INFO(get_logger(), "Whisper.cpp busy, skipping inference");
+    return false;
   }
-  return true;
 }
 
 void Inference::on_audio_debug_print_(const std_msgs::msg::Int16MultiArray::SharedPtr msg) {

From 3983d1bfb1e198475308a9aacdbc465578ff6443 Mon Sep 17 00:00:00 2001
From: Nathan Corral <nathanbcorral@gmail.com>
Date: Wed, 11 Dec 2024 11:34:06 +0100
Subject: [PATCH 4/7] Removed un-needed constructors

---
 .../include/transcript_manager/tokens.hpp     | 29 -------------------
 1 file changed, 29 deletions(-)

diff --git a/transcript_manager/include/transcript_manager/tokens.hpp b/transcript_manager/include/transcript_manager/tokens.hpp
index eb21ef2..a6576a8 100644
--- a/transcript_manager/include/transcript_manager/tokens.hpp
+++ b/transcript_manager/include/transcript_manager/tokens.hpp
@@ -13,7 +13,6 @@ class SingleToken {
 private:
   std::string data_;
   float prob_;
-  int token_id_;
 
 public:
   std::string get_data() const {
@@ -26,34 +25,6 @@ class SingleToken {
 
   SingleToken(const std::string& data_, float prob_)
         : data_(data_), prob_(prob_) {};
-
-  // Copy constructor
-  SingleToken(const SingleToken& other)
-      : data_(other.data_), prob_(other.prob_), token_id_(other.token_id_) {};
-
-  // Move constructor
-  SingleToken(SingleToken&& other) noexcept
-      : data_(std::move(other.data_)), prob_(other.prob_), token_id_(other.token_id_) {};
-
-  // Copy assignment operator
-  SingleToken& operator=(const SingleToken& other) {
-    if ( this != &other ) {
-      data_ = other.data_;
-      prob_ = other.prob_;
-      token_id_ = other.token_id_;
-    }
-    return *this;
-  }
-
-  // Move assignment operator
-  SingleToken& operator=(SingleToken&& other) noexcept {
-    if ( this != &other ) {
-      data_ = std::move(other.data_);
-      prob_ = other.prob_;
-      token_id_ = other.token_id_;
-    }
-    return *this;
-  }
 };
 
 } // end of namespace whisper

From d7c1ac49dc7befc6c64aef0de2499f83a2237502 Mon Sep 17 00:00:00 2001
From: Nathan Corral <nathanbcorral@gmail.com>
Date: Wed, 11 Dec 2024 15:13:04 +0100
Subject: [PATCH 5/7] Update to version 1.4.0

---
 CHANGELOG.rst                           | 35 +++++++++++++++++++++++++
 audio_listener/package.xml              |  2 +-
 audio_listener/setup.py                 |  2 +-
 transcript_manager/package.xml          |  6 ++---
 whisper_bringup/launch/replay.launch.py |  9 -------
 whisper_bringup/package.xml             |  2 +-
 whisper_cpp_vendor/package.xml          |  2 +-
 whisper_demos/package.xml               |  2 +-
 whisper_demos/setup.py                  |  2 +-
 whisper_idl/package.xml                 |  2 +-
 whisper_server/package.xml              |  2 +-
 whisper_util/package.xml                |  2 +-
 12 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index c7a3c45..b24ec4f 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,8 +1,43 @@
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Changelog for package ROS 2 Whisper
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+## 1.4.0 (2024-12-11)
+
+- `whisper_cpp_vendor`: `whisper.cpp` 1.6.2 to 1.7.2 release, build changes
+
+- Added live audio transcription streaming
+
+- `whisper_server`:  changes:
+  - Holding incoming Audio data in a Ring Buffer (removed BatchBuffer, drop oldest audio).
+  - Transcribing the entire buffer of audio data with whisper.cpp on a timer interrupt
+  - Publishing the resulting tokens + probabilities on topic  `/whisper/tokens` 
+  - Removing the Action Server
+  - New Node Parameters:
+    - `active` -- Boolean to control if whisper.cpp should be run or not.
+    - `callback_ms` -- Integer controlling how often whisper.cpp is called. 
+    - `buffer_capacity` -- Integer number of seconds previous where audio is transcribed.
+  
+- `transcript_manager`:  Package added to:
+
+  - Store record of what was previously transcribed.
+  - Track what is currently being transcribed.  Align and update the text from subscribed topic `/whisper/tokens`.
+    - Updates done on timer interrupt
+  - Host the Action Server which was previously part of `whisper_server`
+  - Publish the entire transcript (previous and current) under `/whisper/transcript_stream` 
+    - Published transcript contains text and estimated segment markings, segment timestamps
+
+- `whisper_demos`:   Add `stream` node
+
+- `whisper_idl`:  Added `msg/WhisperTokens.msg`,  `msg/AudioTranscript.msg` 
+
+- `whisper_idl`:  Added  `launch/replay.launch.py` which does not bring up `audio_listener`
+
+- `whisper_util`:  Changes to directly inference and then serialize whisper.cpp model output, also containing probability data.
+
 1.3.1 (2024-07-01)
 ------------------
+
 * `whisper_msgs`: Changed to `whisper_idl` package
 * `whisper_bringup`: Changed executor to `MultiThreadedExecutor` so audio and inference can run in parallel on `whisper_server`
 
diff --git a/audio_listener/package.xml b/audio_listener/package.xml
index d830bf3..b31b2bb 100644
--- a/audio_listener/package.xml
+++ b/audio_listener/package.xml
@@ -2,7 +2,7 @@
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
   <name>audio_listener</name>
-  <version>1.3.1</version>
+  <version>1.4.0</version>
   <description>Audio common replica.</description>
   <maintainer email="m.huber_1994@hotmail.de">mhubii</maintainer>
   <license>MIT</license>
diff --git a/audio_listener/setup.py b/audio_listener/setup.py
index 37cc54d..7a8ee6c 100644
--- a/audio_listener/setup.py
+++ b/audio_listener/setup.py
@@ -4,7 +4,7 @@
 
 setup(
     name=package_name,
-    version="1.3.1",
+    version="1.4.0",
     packages=find_packages(exclude=["test"]),
     data_files=[
         ("share/ament_index/resource_index/packages", ["resource/" + package_name]),
diff --git a/transcript_manager/package.xml b/transcript_manager/package.xml
index fb7bd29..9b99cf4 100644
--- a/transcript_manager/package.xml
+++ b/transcript_manager/package.xml
@@ -2,10 +2,10 @@
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
   <name>transcript_manager</name>
-  <version>0.0.1</version>
+  <version>1.4.0</version>
   <description>Fuse overlapping whisper inference results into a single transcirpt.</description>
-  <maintainer email="nathanbcorral@gmail.com">nathan</maintainer>
-  <license>Apache-2.0</license>
+  <maintainer email="nathan.b.corral@gmail.com">nathan</maintainer>
+  <license>MIT</license>
 
   <buildtool_depend>ament_cmake</buildtool_depend>
 
diff --git a/whisper_bringup/launch/replay.launch.py b/whisper_bringup/launch/replay.launch.py
index 4efd7b4..30598c1 100644
--- a/whisper_bringup/launch/replay.launch.py
+++ b/whisper_bringup/launch/replay.launch.py
@@ -20,15 +20,6 @@ def generate_launch_description() -> LaunchDescription:
 
     ld = LaunchDescription()
 
-    # launch audio listener
-    # ld.add_action(
-    #     Node(
-    #         package="audio_listener",
-    #         executable="audio_listener",
-    #         output="screen",
-    #     )
-    # )
-
     # launch whisper
     whisper_config = os.path.join(
         get_package_share_directory("whisper_server"), "config", "whisper.yaml"
diff --git a/whisper_bringup/package.xml b/whisper_bringup/package.xml
index 2d9d12f..3a71952 100644
--- a/whisper_bringup/package.xml
+++ b/whisper_bringup/package.xml
@@ -2,7 +2,7 @@
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
   <name>whisper_bringup</name>
-  <version>1.3.1</version>
+  <version>1.4.0</version>
   <description>TODO: Package description</description>
   <maintainer email="m.huber_1994@hotmail.de">mhubii</maintainer>
   <license>MIT</license>
diff --git a/whisper_cpp_vendor/package.xml b/whisper_cpp_vendor/package.xml
index b5d90b2..79c6077 100644
--- a/whisper_cpp_vendor/package.xml
+++ b/whisper_cpp_vendor/package.xml
@@ -2,7 +2,7 @@
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
   <name>whisper_cpp_vendor</name>
-  <version>1.3.1</version>
+  <version>1.4.0</version>
   <description>Vendor package for whisper.cpp.</description>
   <maintainer email="m.huber_1994@hotmail.de">mhubii</maintainer>
   <license>MIT</license>
diff --git a/whisper_demos/package.xml b/whisper_demos/package.xml
index aa8b19e..e1316ec 100644
--- a/whisper_demos/package.xml
+++ b/whisper_demos/package.xml
@@ -2,7 +2,7 @@
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
   <name>whisper_demos</name>
-  <version>1.3.1</version>
+  <version>1.4.0</version>
   <description>Demos for using the ROS 2 whisper package.</description>
   <maintainer email="m.huber_1994@hotmail.de">mhubii</maintainer>
   <license>MIT</license>
diff --git a/whisper_demos/setup.py b/whisper_demos/setup.py
index 06f9059..78fe9b3 100644
--- a/whisper_demos/setup.py
+++ b/whisper_demos/setup.py
@@ -4,7 +4,7 @@
 
 setup(
     name=package_name,
-    version="1.3.1",
+    version="1.4.0",
     packages=find_packages(exclude=["test"]),
     data_files=[
         ("share/ament_index/resource_index/packages", ["resource/" + package_name]),
diff --git a/whisper_idl/package.xml b/whisper_idl/package.xml
index 79a0331..730ceda 100644
--- a/whisper_idl/package.xml
+++ b/whisper_idl/package.xml
@@ -2,7 +2,7 @@
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
   <name>whisper_idl</name>
-  <version>1.3.1</version>
+  <version>1.4.0</version>
   <description>Messages for the ROS 2 whisper package</description>
   <maintainer email="m.huber_1994@hotmail.de">mhubii</maintainer>
   <license>MIT</license>
diff --git a/whisper_server/package.xml b/whisper_server/package.xml
index 27fdefa..62b770c 100644
--- a/whisper_server/package.xml
+++ b/whisper_server/package.xml
@@ -2,7 +2,7 @@
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
   <name>whisper_server</name>
-  <version>1.3.1</version>
+  <version>1.4.0</version>
   <description>ROS 2 whisper.cpp inference server.</description>
   <maintainer email="m.huber_1994@hotmail.de">mhubii</maintainer>
   <license>MIT</license>
diff --git a/whisper_util/package.xml b/whisper_util/package.xml
index bb5d697..b781658 100644
--- a/whisper_util/package.xml
+++ b/whisper_util/package.xml
@@ -2,7 +2,7 @@
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
   <name>whisper_util</name>
-  <version>1.3.1</version>
+  <version>1.4.0</version>
   <description>ROS 2 wrapper for whisper.cpp.</description>
   <maintainer email="m.huber_1994@hotmail.de">mhubii</maintainer>
   <license>MIT</license>

From c64e31a33e376baf13aa1fe3843ca033702c16cf Mon Sep 17 00:00:00 2001
From: Nathan Corral <nathanbcorral@gmail.com>
Date: Wed, 11 Dec 2024 16:01:01 +0100
Subject: [PATCH 6/7] Fix changelog from markdown to reStructured text

---
 CHANGELOG.rst | 55 +++++++++++++++++++++++----------------------------
 1 file changed, 25 insertions(+), 30 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index b24ec4f..cf471ed 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,38 +2,33 @@
 Changelog for package ROS 2 Whisper
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-## 1.4.0 (2024-12-11)
-
-- `whisper_cpp_vendor`: `whisper.cpp` 1.6.2 to 1.7.2 release, build changes
-
-- Added live audio transcription streaming
-
-- `whisper_server`:  changes:
-  - Holding incoming Audio data in a Ring Buffer (removed BatchBuffer, drop oldest audio).
-  - Transcribing the entire buffer of audio data with whisper.cpp on a timer interrupt
-  - Publishing the resulting tokens + probabilities on topic  `/whisper/tokens` 
-  - Removing the Action Server
-  - New Node Parameters:
-    - `active` -- Boolean to control if whisper.cpp should be run or not.
-    - `callback_ms` -- Integer controlling how often whisper.cpp is called. 
-    - `buffer_capacity` -- Integer number of seconds previous where audio is transcribed.
-  
-- `transcript_manager`:  Package added to:
-
-  - Store record of what was previously transcribed.
-  - Track what is currently being transcribed.  Align and update the text from subscribed topic `/whisper/tokens`.
-    - Updates done on timer interrupt
-  - Host the Action Server which was previously part of `whisper_server`
-  - Publish the entire transcript (previous and current) under `/whisper/transcript_stream` 
-    - Published transcript contains text and estimated segment markings, segment timestamps
-
-- `whisper_demos`:   Add `stream` node
-
-- `whisper_idl`:  Added `msg/WhisperTokens.msg`,  `msg/AudioTranscript.msg` 
+1.4.0 (2024-12-11)
+------------------
 
-- `whisper_idl`:  Added  `launch/replay.launch.py` which does not bring up `audio_listener`
+* `whisper_cpp_vendor`: `whisper.cpp` 1.6.2 to 1.7.2 release, build changes
+* Added live audio transcription streaming
+* `whisper_server` changes:
+   * Holding incoming Audio data in a Ring Buffer (removed BatchBuffer, drop oldest audio).
+   * Transcribing the entire buffer of audio data with whisper.cpp on a timer interrupt
+   * Publishing the resulting tokens + probabilities on topic `/whisper/tokens`
+   * Removing the Action Server
+   * New Node Parameters:
+       * `active` -- Boolean to control if whisper.cpp should be run or not.
+       * `callback_ms` -- Integer controlling how often whisper.cpp is called.
+       * `buffer_capacity` -- Integer number of seconds previous where audio is transcribed.
+* `transcript_manager` package added to:
+   * Store record of what was previously transcribed.
+   * Track what is currently being transcribed. Align and update the text from subscribed topic `/whisper/tokens`.
+       * Updates done on timer interrupt
+   * Host the Action Server which was previously part of `whisper_server`
+   * Publish the entire transcript (previous and current) under `/whisper/transcript_stream`
+       * Published transcript contains text and estimated segment markings, segment timestamps
+* `whisper_demos`: Add `stream` node
+* `whisper_idl`:
+   * Added `msg/WhisperTokens.msg`, `msg/AudioTranscript.msg`
+   * Added `launch/replay.launch.py` which does not bring up `audio_listener`
+* `whisper_util`: Changes to directly inference and then serialize whisper.cpp model output, also containing probability data.
 
-- `whisper_util`:  Changes to directly inference and then serialize whisper.cpp model output, also containing probability data.
 
 1.3.1 (2024-07-01)
 ------------------

From 4d09999ac934b4e43c83ed0824bdd7c13424e8ab Mon Sep 17 00:00:00 2001
From: Nathan Corral <nathanbcorral@gmail.com>
Date: Thu, 12 Dec 2024 15:16:50 +0100
Subject: [PATCH 7/7] Unnecessary compiler link to runtime executables and
 static libraries

---
 whisper_cpp_vendor/CMakeLists.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/whisper_cpp_vendor/CMakeLists.txt b/whisper_cpp_vendor/CMakeLists.txt
index 8253525..110cb37 100644
--- a/whisper_cpp_vendor/CMakeLists.txt
+++ b/whisper_cpp_vendor/CMakeLists.txt
@@ -62,8 +62,6 @@ install(
   TARGETS whisper
   EXPORT export_whisper
   LIBRARY DESTINATION lib
-  ARCHIVE DESTINATION lib
-  RUNTIME DESTINATION bin
   INCLUDES DESTINATION include
   PUBLIC_HEADER DESTINATION include
 )