Skip to content

Commit

Permalink
Added youtube support
Browse files Browse the repository at this point in the history
  • Loading branch information
movchan74 committed Nov 13, 2023
1 parent 3337777 commit 5e52de5
Show file tree
Hide file tree
Showing 9 changed files with 369 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 ffmpeg
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ WORKDIR /app
ENV DEBIAN_FRONTEND=non-interactive

# Install required libraries, tools, and Python3
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 curl git python3.10 python3.10-dev python3-pip python3.10-venv
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 ffmpeg curl git python3.10 python3.10-dev python3-pip python3.10-venv

# Install poetry
RUN curl -sSL https://install.python-poetry.org | python3 -
Expand Down
2 changes: 1 addition & 1 deletion aana/configs/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
name="blip2_video_generate",
path="/video/generate_captions",
summary="Generate captions for videos using BLIP2 OPT-2.7B",
outputs=["video_captions_hf_blip2_opt_2_7b"],
outputs=["video_captions_hf_blip2_opt_2_7b", "timestamps"],
),
],
"video": [
Expand Down
47 changes: 43 additions & 4 deletions aana/configs/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from aana.models.pydantic.image_input import ImageInputList
from aana.models.pydantic.prompt import Prompt
from aana.models.pydantic.sampling_params import SamplingParams
from aana.models.pydantic.video_input import VideoInputList
from aana.models.pydantic.video_input import VideoOrYoutubeVideoInputList
from aana.models.pydantic.video_params import VideoParams

# container data model
Expand Down Expand Up @@ -190,6 +190,19 @@
}
],
},
# {
# "name": "videos",
# "type": "input",
# "inputs": [],
# "outputs": [
# {
# "name": "videos",
# "key": "videos",
# "path": "video_batch.videos.[*].video",
# "data_model": VideoInputList,
# }
# ],
# },
{
"name": "videos",
"type": "input",
Expand All @@ -198,11 +211,33 @@
{
"name": "videos",
"key": "videos",
"path": "video_batch.videos.[*].video",
"data_model": VideoInputList,
"path": "video_batch.videos.[*].video_input",
"data_model": VideoOrYoutubeVideoInputList,
}
],
},
{
"name": "youtube_download",
"type": "ray_task",
"function": "aana.utils.video.download_youtube_video",
"batched": True,
"flatten_by": "video_batch.videos.[*]",
"dict_output": False,
"inputs": [
{
"name": "videos",
"key": "video",
"path": "video_batch.videos.[*].video_input",
},
],
"outputs": [
{
"name": "video_objects",
"key": "output",
"path": "video_batch.videos.[*].video",
},
],
},
{
"name": "video_params",
"type": "input",
Expand All @@ -223,7 +258,11 @@
"batched": True,
"flatten_by": "video_batch.videos.[*]",
"inputs": [
{"name": "videos", "key": "video", "path": "video_batch.videos.[*].video"},
{
"name": "video_objects",
"key": "video",
"path": "video_batch.videos.[*].video",
},
{"name": "video_params", "key": "params", "path": "video_batch.params"},
],
"outputs": [
Expand Down
117 changes: 117 additions & 0 deletions aana/models/pydantic/video_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,3 +204,120 @@ class Config:
}
file_upload = True
file_upload_description = "Upload video files."


class YoutubeVideoInput(BaseModel):
"""
A video input for youtube videos.
Attributes:
youtube_url: the URL of the youtube video
"""

youtube_url: str = Field(
...,
description="The URL of the youtube video.",
)

@validator("youtube_url")
def check_youtube_url(cls, v: str) -> str:
"""
Check that the youtube URL is valid.
Args:
v (str): the youtube URL
Returns:
str: the youtube URL
Raises:
ValueError: if the youtube URL is invalid
"""
# TODO: implement the youtube URL validation
return v

class Config:
schema_extra = {
"description": (
"A youtube video. \n"
"The video will be downloaded from youtube using the provided URL."
)
}
validate_assignment = True


class VideoOrYoutubeVideoInputList(BaseListModel):
"""
A pydantic model for a list of video inputs.
"""

__root__: List[VideoInput | YoutubeVideoInput] = Field(
...,
description="The list of video inputs.",
)

@validator("__root__", pre=True)
def check_non_empty(
cls, v: List[VideoInput | YoutubeVideoInput]
) -> List[VideoInput | YoutubeVideoInput]:
"""
Check that the list of videos isn't empty.
Args:
v (List[VideoInput | YoutubeVideoInput]): the list of videos
Returns:
List[VideoInput | YoutubeVideoInput]: the list of videos
Raises:
ValueError: if the list of videos is empty
"""
if len(v) == 0:
raise ValueError("The list of videos must not be empty.")
return v

def set_files(self, files: List[bytes]):
"""
Set the files for the videos.
Args:
files (List[bytes]): the files uploaded to the endpoint
Raises:
ValidationError: if the number of videos and files aren't the same
"""

if len(self.__root__) != len(files):
error = ErrorWrapper(
ValueError("The number of videos and files must be the same."),
loc=("videos",),
)
raise ValidationError([error], self.__class__)
for video, file in zip(self.__root__, files):
if isinstance(video, VideoInput):
video.set_file(file)
else:
raise ValueError("The video must be a VideoInput.")

def convert_input_to_object(self) -> List[VideoInput | YoutubeVideoInput]:
"""
Convert the VideoOrYoutubeVideoInputList to a list of video inputs.
Returns:
List[VideoInput | YoutubeVideoInput]: the list of video inputs
"""
return self.__root__

class Config:
schema_extra = {
"description": (
"A list of videos. \n"
"Exactly one of 'path', 'url', or 'content' must be provided for each video. \n"
"If 'path' is provided, the video will be loaded from the path. \n"
"If 'url' is provided, the video will be downloaded from the url. \n"
"The 'content' will be loaded automatically "
"if files are uploaded to the endpoint (should be set to 'file' for that)."
)
}
file_upload = True
file_upload_description = "Upload video files."
45 changes: 45 additions & 0 deletions aana/tests/test_video.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from importlib import resources
from pathlib import Path
import pytest
from aana.configs.settings import settings
from aana.exceptions.general import DownloadException
from aana.models.core.video import Video
from aana.models.pydantic.video_input import VideoInput, YoutubeVideoInput
from aana.utils.video import download_youtube_video


@pytest.fixture
Expand Down Expand Up @@ -139,3 +143,44 @@ def test_at_least_one_input():

with pytest.raises(ValueError):
Video(save_on_disk=True)


def test_download_youtube_video():
"""
Test that download_youtube_video works for both VideoInput and YoutubeVideoInput.
"""
# Test VideoInput
path = resources.path("aana.tests.files.videos", "squirrel.mp4")
video_input = VideoInput(path=str(path))
video = download_youtube_video(video_input)
assert isinstance(video, Video)
assert video.path == path
assert video.content is None
assert video.url is None

# Test YoutubeVideoInput
youtube_url = "https://www.youtube.com/watch?v=yModCU1OVHY"
tmp_dir = settings.tmp_data_dir
youtube_video_dir = tmp_dir / "youtube_videos"
expected_path = youtube_video_dir / "yModCU1OVHY.mp4"
# remove the file if it exists
expected_path.unlink(missing_ok=True)

try:
youtube_video_input = YoutubeVideoInput(youtube_url=youtube_url)
video = download_youtube_video(youtube_video_input)
assert isinstance(video, Video)
assert video.path == expected_path
assert video.path is not None
assert video.path.exists()
assert video.content is None
assert video.url is None
finally:
if video and video.path:
video.path.unlink(missing_ok=True)

# Test YoutubeVideoInput with invalid youtube_url
youtube_url = "https://www.youtube.com/watch?v=invalid_url"
youtube_video_input = YoutubeVideoInput(youtube_url=youtube_url)
with pytest.raises(DownloadException):
download_youtube_video(youtube_video_input)
Loading

0 comments on commit 5e52de5

Please sign in to comment.