diff --git a/.gitignore b/.gitignore index 7c7fbe3..fb8a5c0 100644 --- a/.gitignore +++ b/.gitignore @@ -355,4 +355,3 @@ fabric.properties !.idea/runConfigurations # End of https://www.toptal.com/developers/gitignore/api/pycharm+all -src/mosaico/script_generators/news/* diff --git a/src/mosaico/script_generators/news/__init__.py b/src/mosaico/script_generators/news/__init__.py new file mode 100644 index 0000000..aedee97 --- /dev/null +++ b/src/mosaico/script_generators/news/__init__.py @@ -0,0 +1,4 @@ +from mosaico.script_generators.news.generator import NewsVideoScriptGenerator + + +__all__ = ["NewsVideoScriptGenerator"] diff --git a/src/mosaico/script_generators/news/generator.py b/src/mosaico/script_generators/news/generator.py new file mode 100644 index 0000000..e3b1222 --- /dev/null +++ b/src/mosaico/script_generators/news/generator.py @@ -0,0 +1,147 @@ +from typing import TYPE_CHECKING, Any, Sequence + +from pydantic import BaseModel +from pydantic_extra_types.language_code import LanguageAlpha2 + +from mosaico.assets.types import AssetType +from mosaico.media import Media +from mosaico.script_generators.news.prompts import ( + MEDIA_SUGGESTING_PROMPT, + SHOOTING_SCRIPT_PROMPT, + SUMMARIZE_CONTEXT_PROMPT, +) +from mosaico.script_generators.script import ShootingScript + + +if TYPE_CHECKING: + from openai.types.chat import ChatCompletionMessageParam + + +class ParagraphMediaSuggestion(BaseModel): + """A media suggestion for a paragraph.""" + + paragraph: str + """The paragraph content to which the media object corresponds.""" + + media_id: str + """The media reference for the shot.""" + + type: AssetType + """The type of media (image, video, or audio).""" + + relevance: str + """How it relates to the specific paragraph.""" + + +class ParagraphMediaSuggestions(BaseModel): + """A list of media suggestions for paragraphs.""" + + suggestions: list[ParagraphMediaSuggestion] + """The list of paragraph media suggestions.""" + + +class NewsVideoScriptGenerator: + def __init__( + self, + context: str, + model: str = "gpt-4o", + model_params: dict[str, Any] | None = None, + api_key: str | None = None, + base_url: str | None = None, + num_paragraphs: int = 5, + language: str | LanguageAlpha2 | None = None, + timeout: float = 120, + ) -> None: + try: + import instructor + import litellm + except ImportError: + raise ImportError( + "The 'instructor' and 'litellm' packages are required for using the NewsVideoScriptGenerator." + ) + self.context = context + self.model = model + self.model_params = model_params or {"temperature": 0} + self.num_paragraphs = num_paragraphs + self.language = LanguageAlpha2(language) if language is not None else LanguageAlpha2("en") + self.client = instructor.from_litellm(litellm.completion, api_key=api_key, base_url=base_url, timeout=timeout) + + def generate(self, media: Sequence[Media], **kwargs: Any) -> ShootingScript: + """ + Generate scenes for a project with AI. + + :param media: The list of media objects. + :param kwargs: Additional context for the scene generation. + :return: A tuple containing the scenes and assets generated from the media files. + """ + paragraphs = self._summarize_context(self.context, self.num_paragraphs, self.language) + suggestions = self._suggest_paragraph_media(paragraphs, media) + shooting_script = self._generate_shooting_script(suggestions) + return shooting_script + + def _summarize_context(self, context: str, num_paragraphs: int, language: LanguageAlpha2) -> list[str]: + """ + Summarize the context to provide a brief overview of the article. + """ + paragraphs_prompt = SUMMARIZE_CONTEXT_PROMPT.format( + context=context, num_paragraphs=num_paragraphs, language=language.name + ) + paragraphs = self._fetch_completion(paragraphs_prompt, response_type=str) + return paragraphs + + def _suggest_paragraph_media(self, paragraphs: list[str], media: Sequence[Media]) -> list[ParagraphMediaSuggestion]: + """ + Suggest media usage based on the media objects. + """ + formatted_media = _build_media_string(media) + prompt = MEDIA_SUGGESTING_PROMPT.format(paragraphs=paragraphs, media_objects=formatted_media) + suggestions = self._fetch_completion(prompt, response_type=ParagraphMediaSuggestions) + return suggestions.suggestions + + def _generate_shooting_script(self, suggestions: list[ParagraphMediaSuggestion]) -> ShootingScript: + """ + Generate the shooting script. + """ + prompt = SHOOTING_SCRIPT_PROMPT.format(suggestions=suggestions) + shooting_script = self._fetch_completion(prompt, response_type=ShootingScript) + return shooting_script + + def _fetch_completion( + self, + user_message: str, + system_message: str = "You are a helpful assistant.", + *, + response_type: type[Any], + **kwargs: Any, + ) -> Any: + """ + Fetch a completion from the AI model. + """ + messages: list[ChatCompletionMessageParam] = [ + {"role": "system", "content": system_message}, + {"role": "user", "content": user_message}, + ] + model_params = self.model_params | kwargs + return self.client.chat.completions.create( + model=self.model, messages=messages, response_model=response_type, **model_params + ) + + +def _format_media(media: Media, index: int) -> str: + """ + Format a media object as a string for display. + """ + description = media.description + mime_type = media.mime_type or "text/plain" + return f"Media ID: {media.id}\nMIME type: {mime_type}\nDescription: {description}\n\n" + + +def _build_media_string(medias: Sequence[Media]) -> str: + """ + Build context and media strings for generating a script. + """ + media_str = "" + for index, media in enumerate(medias): + fmt_media = _format_media(media, index) + media_str += fmt_media + return media_str diff --git a/src/mosaico/script_generators/news/prompts.py b/src/mosaico/script_generators/news/prompts.py new file mode 100644 index 0000000..1282dc9 --- /dev/null +++ b/src/mosaico/script_generators/news/prompts.py @@ -0,0 +1,73 @@ +import textwrap + + +SUMMARIZE_CONTEXT_PROMPT = textwrap.dedent( + """ + INSTRUCTIONS: + You are a helpful news assistant tasked with summarizing the key points of the following context for a journalist + in paragraphs. Your summary should be concise, informative, and capture the most important details of the context. + The summary will be used by the journalist to produce a self-contained shooting script for an informative video + based on the context provided. + + + OUTPUT GUIDELINES: + - The summary should have {num_paragraphs} paragraphs. + - Each paragraph should be 1 sentence long. + - Adhere to the best practices of journalistic writing. + - Return only the paragraphs in {language} without any additional information. + + CONTEXT: + {context} + + SUMMARY: + """ +).strip() + +MEDIA_SUGGESTING_PROMPT = textwrap.dedent( + """ + INSTRUCTIONS: + You are a helpful news assistant tasked with selecting media objects from the provided collection to enhance + the visual appeal and storytelling of an informative video. Your selections should be relevant, engaging, and + directly correspond to the content of each paragraph. + + From the media objects provided, you will select items that best match the content of each paragraph. Your goal + is to choose media that will enhance the viewer's understanding and create a compelling visual narrative. + + OUTPUT GUIDELINES: + - For each paragraph, select one media object from the provided collection + - Only select media objects that are available in the provided collection + - Avoid selecting the same media object for multiple paragraphs + - Answer only with the structured response format in the same language as the paragraphs + + PARAGRAPHS: + {paragraphs} + + AVAILABLE MEDIA OBJECTS: + {media_objects} + + SUGGESTIONS: + """ +).strip() + + +SHOOTING_SCRIPT_PROMPT = textwrap.dedent( + """ + INSTRUCTIONS: + You are an experienced journalist and scriptwriter tasked with creating a detailed shooting script for an + informative video based on the following paragraphs and media objects. Your script should suggest specific + shot, effects, and narration that effectively tell the story while incorporating the media assets. + + The script should maintain journalistic standards of accuracy and objectivity while being engaging for viewers. + Make sure each suggested media object is thoughtfully integrated to enhance the narrative flow. + + OUTPUT GUIDELINES: + - Provide a detailed shooting script that includes shots, effects, and timings. + - Use the paragraphs as subtitles for each shot. Keep them as they are. + - Respond only with the structured output format in the same language as the paragraphs. + + PARAGRAPHS AND MEDIA OBJECTS SUGGESTIONS: + {suggestions} + + SHOOTING SCRIPT: + """ +).strip()