Skip to content

Commit

Permalink
Merge pull request #132 from mobiusml/hr/download
Browse files Browse the repository at this point in the history
Hr/download metadata
  • Loading branch information
HRashidi authored Jul 11, 2024
2 parents 4021654 + 1ba4970 commit 1b66d32
Show file tree
Hide file tree
Showing 31 changed files with 494 additions and 1,593 deletions.
3 changes: 3 additions & 0 deletions aana/core/models/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ class VideoMetadata(BaseModel):

title: str = Field(None, description="The title of the video.")
description: str = Field(None, description="The description of the video.")
duration: float | None = Field(
None, description="The duration of the video in seconds."
)
model_config = ConfigDict(
json_schema_extra={
"description": "Metadata of a video.",
Expand Down
7 changes: 4 additions & 3 deletions aana/deployments/vllm_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class VLLMConfig(BaseModel):
chat_template (str): the name of the chat template, if not provided, the chat template from the model will be used
but some models may not have a chat template (optional, default: None)
enforce_eager: whether to enforce eager execution (optional, default: False)
engine_args: extra engine arguments (optional, default: {})
engine_args: extra engine arguments (optional, default: {})
"""

Expand All @@ -57,6 +57,7 @@ class VLLMConfig(BaseModel):
enforce_eager: bool | None = Field(default=False)
engine_args: CustomConfig = {}


@serve.deployment
class VLLMDeployment(BaseTextGenerationDeployment):
"""Deployment to serve large language models using vLLM."""
Expand Down Expand Up @@ -101,7 +102,7 @@ async def apply_config(self, config: dict[str, Any]):
enforce_eager=config_obj.enforce_eager,
gpu_memory_utilization=self.gpu_memory_utilization,
max_model_len=config_obj.max_model_len,
**config_obj.engine_args
**config_obj.engine_args,
)

# TODO: check if the model is already loaded.
Expand Down Expand Up @@ -153,7 +154,7 @@ async def generate_stream(
results_generator = self.engine.generate(
sampling_params=sampling_params_vllm,
request_id=request_id,
inputs=TokensPrompt(prompt_token_ids=prompt_token_ids)
inputs=TokensPrompt(prompt_token_ids=prompt_token_ids),
)

num_returned = 0
Expand Down
40 changes: 39 additions & 1 deletion aana/integrations/external/yt_dlp.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,51 @@
import hashlib
from pathlib import Path
from typing_extensions import TypedDict

import yt_dlp
from yt_dlp.utils import DownloadError

from aana.configs.settings import settings
from aana.core.models.video import Video, VideoInput
from aana.core.models.video import Video, VideoInput, VideoMetadata
from aana.exceptions.io import (
DownloadException,
)


def get_video_metadata(video_url: str) -> VideoMetadata:
"""Fetch video's metadata for a url.
Args:
video_url (str): the video input url
Returns:
metadata (VideoMetadata): the metadata of the video
Raises:
DownloadException: Request does not succeed.
"""

ydl_options = {
"extract_flat": True,
"hls_prefer_native": True,
"extractor_args": {"youtube": {"skip": ["hls", "dash"]}},
}
try:
with yt_dlp.YoutubeDL(ydl_options) as ydl:
info = ydl.extract_info(video_url, download=False)
title = info.get("title", "")
description = info.get("description", "")
duration = info.get("duration")
return VideoMetadata(
title=title,
description=description,
duration=duration,
)
except DownloadError as e:
error_message = e.msg.split(";")[0]
raise DownloadException(url=video_url, msg=error_message) from e


def download_video(video_input: VideoInput | Video) -> Video:
"""Downloads videos for a VideoInput object.
Expand All @@ -19,6 +54,9 @@ def download_video(video_input: VideoInput | Video) -> Video:
Returns:
Video: the video object
Raises:
DownloadException: Request does not succeed.
"""
if isinstance(video_input, Video):
return video_input
Expand Down
22 changes: 19 additions & 3 deletions aana/projects/chat_with_video/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from aana.exceptions.db import MediaIdAlreadyExistsException, UnfinishedVideoException
from aana.exceptions.io import VideoTooLongException
from aana.integrations.external.decord import generate_frames, get_video_duration
from aana.integrations.external.yt_dlp import download_video
from aana.integrations.external.yt_dlp import download_video, get_video_metadata
from aana.processors.remote import run_remote
from aana.processors.video import extract_audio, generate_combined_timeline
from aana.projects.chat_with_video.const import (
Expand Down Expand Up @@ -154,8 +154,22 @@ async def run(
if check_media_id_exist(media_id):
raise MediaIdAlreadyExistsException(table_name="media", media_id=video)

video_duration = None
if video.url is not None:
video_metadata = get_video_metadata(video.url)
video_duration = video_metadata.duration

# precheck for max video length before actually download the video if possible
if video_duration and video_duration > max_video_len:
raise VideoTooLongException(
video=video,
video_len=video_duration,
max_len=max_video_len,
)

video_obj: Video = await run_remote(download_video)(video_input=video)
video_duration = await run_remote(get_video_duration)(video=video_obj)
if video_duration is None:
video_duration = await run_remote(get_video_duration)(video=video_obj)

if video_duration > max_video_len:
raise VideoTooLongException(
Expand All @@ -168,7 +182,9 @@ async def run(
yield {
"media_id": media_id,
"metadata": VideoMetadata(
title=video_obj.title, description=video_obj.description
title=video_obj.title,
description=video_obj.description,
duration=video_duration,
),
}

Expand Down
1 change: 1 addition & 0 deletions aana/storage/services/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,4 +392,5 @@ def load_video_metadata(
return VideoMetadata(
title=video_entity.title,
description=video_entity.description,
duration=video_entity.duration,
)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 1b66d32

Please sign in to comment.