-
Notifications
You must be signed in to change notification settings - Fork 291
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* cleanup * cleanup prompt mgmt * up * cleanup printout * cleanup new parser logic, set vlm as default for all providers * allow user to re-override
- Loading branch information
1 parent
2f674dd
commit 080d8cb
Showing
55 changed files
with
737 additions
and
1,255 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,35 +1,81 @@ | ||
import base64 | ||
import logging | ||
import os | ||
import tempfile | ||
from typing import AsyncGenerator | ||
|
||
from core.base.parsers.base_parser import AsyncParser | ||
from core.parsers.media.openai_helpers import process_audio_with_openai | ||
from core.base.providers import ( | ||
CompletionProvider, | ||
DatabaseProvider, | ||
IngestionConfig, | ||
) | ||
|
||
logger = logging.getLogger() | ||
|
||
|
||
class AudioParser(AsyncParser[bytes]): | ||
"""A parser for audio data.""" | ||
"""A parser for audio data using Whisper transcription.""" | ||
|
||
def __init__( | ||
self, api_base: str = "https://api.openai.com/v1/audio/transcriptions" | ||
self, | ||
config: IngestionConfig, | ||
database_provider: DatabaseProvider, | ||
llm_provider: CompletionProvider, | ||
): | ||
self.api_base = api_base | ||
self.openai_api_key = os.environ.get("OPENAI_API_KEY") | ||
self.database_provider = database_provider | ||
self.llm_provider = llm_provider | ||
self.config = config | ||
try: | ||
from litellm import atranscription | ||
|
||
self.atranscription = atranscription | ||
except ImportError: | ||
logger.error("Failed to import LiteLLM transcription") | ||
raise ImportError( | ||
"Please install the `litellm` package to use the AudioParser." | ||
) | ||
|
||
async def ingest( # type: ignore | ||
self, data: bytes, chunk_size: int = 1024, *args, **kwargs | ||
self, data: bytes, **kwargs | ||
) -> AsyncGenerator[str, None]: | ||
"""Ingest audio data and yield a transcription.""" | ||
temp_audio_path = "temp_audio.wav" | ||
with open(temp_audio_path, "wb") as f: | ||
f.write(data) | ||
""" | ||
Ingest audio data and yield a transcription using Whisper via LiteLLM. | ||
Args: | ||
data: Raw audio bytes | ||
chunk_size: Size of text chunks to yield | ||
model: The model to use for transcription (default is whisper-1) | ||
*args, **kwargs: Additional arguments passed to the transcription call | ||
Yields: | ||
Chunks of transcribed text | ||
""" | ||
try: | ||
transcription_text = process_audio_with_openai( | ||
open(temp_audio_path, "rb"), self.openai_api_key # type: ignore | ||
# Create a temporary file to store the audio data | ||
with tempfile.NamedTemporaryFile( | ||
suffix=".wav", delete=False | ||
) as temp_file: | ||
temp_file.write(data) | ||
temp_file_path = temp_file.name | ||
|
||
# Call Whisper transcription | ||
response = await self.atranscription( | ||
model=self.config.audio_transcription_model, | ||
file=open(temp_file_path, "rb"), | ||
**kwargs, | ||
) | ||
|
||
# split text into small chunks and yield them | ||
for i in range(0, len(transcription_text), chunk_size): | ||
text = transcription_text[i : i + chunk_size] | ||
if text and text != "": | ||
yield text | ||
# The response should contain the transcribed text directly | ||
yield response.text | ||
|
||
except Exception as e: | ||
logger.error(f"Error processing audio with Whisper: {str(e)}") | ||
raise | ||
|
||
finally: | ||
os.remove(temp_audio_path) | ||
# Clean up the temporary file | ||
try: | ||
os.unlink(temp_file_path) | ||
except Exception as e: | ||
logger.warning(f"Failed to delete temporary file: {str(e)}") |
Oops, something went wrong.