Skip to content

Commit

Permalink
tidy file for linting
Browse files Browse the repository at this point in the history
  • Loading branch information
Sheepsta300 committed Aug 28, 2024
1 parent f47fc9b commit edd9707
Showing 1 changed file with 9 additions and 7 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import io
import logging
import os
import io
import time
from typing import Any, Dict, Iterator, Literal, Optional, Tuple, Union

Expand Down Expand Up @@ -44,17 +44,18 @@ def __init__(
api_key (Optional[str]): Azure OpenAI API key.
deployment_model (str): Identifier for the specific model deployment.
chunk_duration_threshold (float): Minimum duration of a chunk in seconds
NOTE: According to the OpenAI API, the chunk duration should be at least 0.1
seconds. If the chunk duration is less or equal than the threshold,
it will be skipped.
NOTE: According to the OpenAI API, the chunk duration should be at
least 0.1 seconds. If the chunk duration is less or equal
than the threshold, it will be skipped.
azure_endpoint (Optional[str]): URL endpoint for the Azure OpenAI service.
api_version (Optional[str]): Version of the OpenAI API to use.
language (Optional[str]): Language for processing the request.
prompt (Optional[str]): Query or instructions for the AI model.
response_format
(Union[Literal["json", "text", "srt", "verbose_json", "vtt"], None]):
Format for the response from the service.
temperature (Optional[float]): Controls the randomness of the AI model’s output.
temperature (Optional[float]): Controls the randomness
of the AI model’s output.
"""
self.api_key = api_key or os.environ.get("AZURE_OPENAI_API_KEY")
self.azure_endpoint = azure_endpoint or os.environ.get("AZURE_OPENAI_ENDPOINT")
Expand Down Expand Up @@ -125,7 +126,7 @@ def lazy_parse(self, blob: Blob) -> Iterator[Document]:
# Need to meet 25MB size limit for Whisper API
chunk_duration = 20
chunk_duration_ms = chunk_duration * 60 * 1000
print(blob.source)

# Split the audio into chunk_duration_ms chunks
for split_number, i in enumerate(range(0, len(audio), chunk_duration_ms)):
# Audio chunk
Expand All @@ -135,7 +136,8 @@ def lazy_parse(self, blob: Blob) -> Iterator[Document]:
continue
file_obj = io.BytesIO(chunk.export(format=file_extension).read())
if blob.source is not None:
file_obj.name = os.path.splitext(blob.source)[0] + f"_part_{split_number}.{file_extension}"
file_obj.name = (os.path.splitext(blob.source)[0]
+ f"_part_{split_number}.{file_extension}")
else:
file_obj.name = f"part_{split_number}.{file_extension}"

Expand Down

0 comments on commit edd9707

Please sign in to comment.