Skip to content

Commit

Permalink
feat(parser, audio): fixed formatting for remote faster whisper server
Browse files Browse the repository at this point in the history
  • Loading branch information
lfenzo committed Nov 10, 2024
1 parent 4ae8542 commit d9ca838
Showing 1 changed file with 15 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -708,7 +708,9 @@ class RemoteFasterWhisperParser(BaseBlobParser):
Example: Load a local audio file and remotely transcribe it into a document.
.. code-block:: python
from langchain.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers.audio import RemoteFasterWhisperParser
from langchain_community.document_loaders.parsers.audio import (
RemoteFasterWhisperParser
)
server_url = 'http://localhost:8000' # add you server url here
Expand Down Expand Up @@ -747,7 +749,8 @@ def lazy_parse(self, blob: Blob) -> Iterator[Document]:
blob (Blob): The audio blob to be transcribed.
Yields:
Document: A Document object containing the transcribed text and associated metadata.
Document: A Document object containing the transcribed text and
associated metadata.
"""
audio_bytes = self._load_audio_from_blob(blob=blob)
transcription = self._transcribe_audio(file_bytes=audio_bytes)
Expand All @@ -772,9 +775,15 @@ def _transcribe_audio(self, file_bytes: bytes) -> dict[str, str]:
Raises:
RuntimeError: If the transcription process fails.
"""
url = self._get_transcription_url()
process = subprocess.Popen(
['curl', url, '-F', 'file=@-;type=audio/mp3', '-F', f'model={self.model_size}'],
[
'curl',
self.transcription_url,
'-F',
'file=@-;type=audio/mp3',
'-F',
f'model={self.model_size}'
],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
Expand All @@ -787,7 +796,8 @@ def _transcribe_audio(self, file_bytes: bytes) -> dict[str, str]:

return json.loads(stdout.decode())

def _get_transcription_url(self) -> str:
@property
def transcription_url(self) -> str:
return f"{self.base_url}/{self.TRANSCRIPTION_ENDPOINT}"

def _load_audio_from_blob(self, blob: Blob) -> bytes:
Expand Down

0 comments on commit d9ca838

Please sign in to comment.