Further refinement of TensorRT-LLM backend based on WhisperS2T

Wordcab · Mar 29, 2024 · 033b008 · 033b008
1 parent 7778357
commit 033b008
Show file tree

Hide file tree

Showing 6 changed files with 10 additions and 11 deletions.
diff --git a/notebooks/async_inference.py b/notebooks/async_inference.py
@@ -1,4 +1,5 @@
 import json
+
 import aiohttp
 
 headers = {"accept": "application/json", "Content-Type": "application/json"}

diff --git a/notebooks/audio_url_inference.py b/notebooks/audio_url_inference.py
@@ -1,4 +1,5 @@
 import json
+
 import requests
 
 headers = {"accept": "application/json", "Content-Type": "application/json"}

diff --git a/notebooks/live_inference.py b/notebooks/live_inference.py
@@ -1,8 +1,10 @@
 """Test the live endpoint."""
 
 import asyncio
+
 import websockets
 
+
 async def test_websocket_endpoint():
     uri = "ws://localhost:5001/api/v1/live?source_lang=en"  # Replace with the actual WebSocket URL
     async with websockets.connect(uri) as websocket:
@@ -16,5 +18,6 @@ async def test_websocket_endpoint():
         except websockets.exceptions.ConnectionClosed:
             print("WebSocket connection closed")
 
+
 if __name__ == "__main__":
     asyncio.get_event_loop().run_until_complete(test_websocket_endpoint())
diff --git a/notebooks/local_audio_inference.py b/notebooks/local_audio_inference.py
@@ -1,6 +1,6 @@
 import json
-import requests
 
+import requests
 
 # filepath = "data/short_one_speaker.mp3"
 # filepath = "data/24118946.mp3"

diff --git a/notebooks/transcribe_endpoint_only.py b/notebooks/transcribe_endpoint_only.py
@@ -37,14 +37,10 @@ def read_audio(
         wav, sr = torchaudio.load(audio)
     elif isinstance(audio, bytes):
         with io.BytesIO(audio) as buffer:
-            wav, sr = sf.read(
-                buffer, format="RAW", channels=1, samplerate=16000, subtype="PCM_16"
-            )
+            wav, sr = sf.read(buffer, format="RAW", channels=1, samplerate=16000, subtype="PCM_16")
         wav = torch.from_numpy(wav).unsqueeze(0)
     else:
-        raise ValueError(
-            f"Invalid audio type. Must be either str or bytes, got: {type(audio)}."
-        )
+        raise ValueError(f"Invalid audio type. Must be either str or bytes, got: {type(audio)}.")
 
     if wav.size(0) > 1:
         wav = wav.mean(dim=0, keepdim=True)
@@ -88,7 +84,6 @@ class TranscribeRequest(BaseModel):
 
 
 async def main():
-
     audio, _ = read_audio("data/HL_Podcast_1.mp3")
     ts = TensorShare.from_dict({"audio": audio}, backend=Backend.TORCH)
 
@@ -111,9 +106,7 @@ async def main():
             headers={"Content-Type": "application/json"},
         ) as response:
             if response.status != 200:
-                raise Exception(
-                    f"Remote transcription failed with status {response.status}."
-                )
+                raise Exception(f"Remote transcription failed with status {response.status}.")
             else:
                 r = await response.json()
 

diff --git a/notebooks/youtube_inference.py b/notebooks/youtube_inference.py
@@ -1,4 +1,5 @@
 import json
+
 import requests
 
 headers = {"accept": "application/json", "Content-Type": "application/json"}