Skip to content

Commit

Permalink
feat: implement NewsVideoScriptGenerator and associated media suggest…
Browse files Browse the repository at this point in the history
…ion prompts
  • Loading branch information
leodiegues committed Nov 19, 2024
1 parent 72d166e commit 122a342
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 1 deletion.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -355,4 +355,3 @@ fabric.properties
!.idea/runConfigurations

# End of https://www.toptal.com/developers/gitignore/api/pycharm+all
src/mosaico/script_generators/news/*
4 changes: 4 additions & 0 deletions src/mosaico/script_generators/news/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from mosaico.script_generators.news.generator import NewsVideoScriptGenerator


__all__ = ["NewsVideoScriptGenerator"]
147 changes: 147 additions & 0 deletions src/mosaico/script_generators/news/generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
from typing import TYPE_CHECKING, Any, Sequence

from pydantic import BaseModel
from pydantic_extra_types.language_code import LanguageAlpha2

from mosaico.assets.types import AssetType
from mosaico.media import Media
from mosaico.script_generators.news.prompts import (
MEDIA_SUGGESTING_PROMPT,
SHOOTING_SCRIPT_PROMPT,
SUMMARIZE_CONTEXT_PROMPT,
)
from mosaico.script_generators.script import ShootingScript


if TYPE_CHECKING:
from openai.types.chat import ChatCompletionMessageParam


class ParagraphMediaSuggestion(BaseModel):
"""A media suggestion for a paragraph."""

paragraph: str
"""The paragraph content to which the media object corresponds."""

media_id: str
"""The media reference for the shot."""

type: AssetType
"""The type of media (image, video, or audio)."""

relevance: str
"""How it relates to the specific paragraph."""


class ParagraphMediaSuggestions(BaseModel):
"""A list of media suggestions for paragraphs."""

suggestions: list[ParagraphMediaSuggestion]
"""The list of paragraph media suggestions."""


class NewsVideoScriptGenerator:
def __init__(
self,
context: str,
model: str = "gpt-4o",
model_params: dict[str, Any] | None = None,
api_key: str | None = None,
base_url: str | None = None,
num_paragraphs: int = 5,
language: str | LanguageAlpha2 | None = None,
timeout: float = 120,
) -> None:
try:
import instructor
import litellm
except ImportError:
raise ImportError(
"The 'instructor' and 'litellm' packages are required for using the NewsVideoScriptGenerator."
)
self.context = context
self.model = model
self.model_params = model_params or {"temperature": 0}
self.num_paragraphs = num_paragraphs
self.language = LanguageAlpha2(language) if language is not None else LanguageAlpha2("en")
self.client = instructor.from_litellm(litellm.completion, api_key=api_key, base_url=base_url, timeout=timeout)

def generate(self, media: Sequence[Media], **kwargs: Any) -> ShootingScript:
"""
Generate scenes for a project with AI.
:param media: The list of media objects.
:param kwargs: Additional context for the scene generation.
:return: A tuple containing the scenes and assets generated from the media files.
"""
paragraphs = self._summarize_context(self.context, self.num_paragraphs, self.language)
suggestions = self._suggest_paragraph_media(paragraphs, media)
shooting_script = self._generate_shooting_script(suggestions)
return shooting_script

def _summarize_context(self, context: str, num_paragraphs: int, language: LanguageAlpha2) -> list[str]:
"""
Summarize the context to provide a brief overview of the article.
"""
paragraphs_prompt = SUMMARIZE_CONTEXT_PROMPT.format(
context=context, num_paragraphs=num_paragraphs, language=language.name
)
paragraphs = self._fetch_completion(paragraphs_prompt, response_type=str)
return paragraphs

def _suggest_paragraph_media(self, paragraphs: list[str], media: Sequence[Media]) -> list[ParagraphMediaSuggestion]:
"""
Suggest media usage based on the media objects.
"""
formatted_media = _build_media_string(media)
prompt = MEDIA_SUGGESTING_PROMPT.format(paragraphs=paragraphs, media_objects=formatted_media)
suggestions = self._fetch_completion(prompt, response_type=ParagraphMediaSuggestions)
return suggestions.suggestions

def _generate_shooting_script(self, suggestions: list[ParagraphMediaSuggestion]) -> ShootingScript:
"""
Generate the shooting script.
"""
prompt = SHOOTING_SCRIPT_PROMPT.format(suggestions=suggestions)
shooting_script = self._fetch_completion(prompt, response_type=ShootingScript)
return shooting_script

def _fetch_completion(
self,
user_message: str,
system_message: str = "You are a helpful assistant.",
*,
response_type: type[Any],
**kwargs: Any,
) -> Any:
"""
Fetch a completion from the AI model.
"""
messages: list[ChatCompletionMessageParam] = [
{"role": "system", "content": system_message},
{"role": "user", "content": user_message},
]
model_params = self.model_params | kwargs
return self.client.chat.completions.create(
model=self.model, messages=messages, response_model=response_type, **model_params
)


def _format_media(media: Media, index: int) -> str:
"""
Format a media object as a string for display.
"""
description = media.description
mime_type = media.mime_type or "text/plain"
return f"Media ID: {media.id}\nMIME type: {mime_type}\nDescription: {description}\n\n"


def _build_media_string(medias: Sequence[Media]) -> str:
"""
Build context and media strings for generating a script.
"""
media_str = ""
for index, media in enumerate(medias):
fmt_media = _format_media(media, index)
media_str += fmt_media
return media_str
73 changes: 73 additions & 0 deletions src/mosaico/script_generators/news/prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import textwrap


SUMMARIZE_CONTEXT_PROMPT = textwrap.dedent(
"""
INSTRUCTIONS:
You are a helpful news assistant tasked with summarizing the key points of the following context for a journalist
in paragraphs. Your summary should be concise, informative, and capture the most important details of the context.
The summary will be used by the journalist to produce a self-contained shooting script for an informative video
based on the context provided.
OUTPUT GUIDELINES:
- The summary should have {num_paragraphs} paragraphs.
- Each paragraph should be 1 sentence long.
- Adhere to the best practices of journalistic writing.
- Return only the paragraphs in {language} without any additional information.
CONTEXT:
{context}
SUMMARY:
"""
).strip()

MEDIA_SUGGESTING_PROMPT = textwrap.dedent(
"""
INSTRUCTIONS:
You are a helpful news assistant tasked with selecting media objects from the provided collection to enhance
the visual appeal and storytelling of an informative video. Your selections should be relevant, engaging, and
directly correspond to the content of each paragraph.
From the media objects provided, you will select items that best match the content of each paragraph. Your goal
is to choose media that will enhance the viewer's understanding and create a compelling visual narrative.
OUTPUT GUIDELINES:
- For each paragraph, select one media object from the provided collection
- Only select media objects that are available in the provided collection
- Avoid selecting the same media object for multiple paragraphs
- Answer only with the structured response format in the same language as the paragraphs
PARAGRAPHS:
{paragraphs}
AVAILABLE MEDIA OBJECTS:
{media_objects}
SUGGESTIONS:
"""
).strip()


SHOOTING_SCRIPT_PROMPT = textwrap.dedent(
"""
INSTRUCTIONS:
You are an experienced journalist and scriptwriter tasked with creating a detailed shooting script for an
informative video based on the following paragraphs and media objects. Your script should suggest specific
shot, effects, and narration that effectively tell the story while incorporating the media assets.
The script should maintain journalistic standards of accuracy and objectivity while being engaging for viewers.
Make sure each suggested media object is thoughtfully integrated to enhance the narrative flow.
OUTPUT GUIDELINES:
- Provide a detailed shooting script that includes shots, effects, and timings.
- Use the paragraphs as subtitles for each shot. Keep them as they are.
- Respond only with the structured output format in the same language as the paragraphs.
PARAGRAPHS AND MEDIA OBJECTS SUGGESTIONS:
{suggestions}
SHOOTING SCRIPT:
"""
).strip()

0 comments on commit 122a342

Please sign in to comment.