Merge pull request #8 from FolhaSP/hotfix/news-video

hotfix: remove news video script generator from gitignore
FolhaSP · Nov 19, 2024 · c99697c · c99697c
2 parents 72d166e + 122a342
commit c99697c
Show file tree

Hide file tree

Showing 4 changed files with 224 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -355,4 +355,3 @@ fabric.properties
 !.idea/runConfigurations
 
 # End of https://www.toptal.com/developers/gitignore/api/pycharm+all
-src/mosaico/script_generators/news/*
diff --git a/src/mosaico/script_generators/news/__init__.py b/src/mosaico/script_generators/news/__init__.py
@@ -0,0 +1,4 @@
+from mosaico.script_generators.news.generator import NewsVideoScriptGenerator
+
+
+__all__ = ["NewsVideoScriptGenerator"]
diff --git a/src/mosaico/script_generators/news/generator.py b/src/mosaico/script_generators/news/generator.py
@@ -0,0 +1,147 @@
+from typing import TYPE_CHECKING, Any, Sequence
+
+from pydantic import BaseModel
+from pydantic_extra_types.language_code import LanguageAlpha2
+
+from mosaico.assets.types import AssetType
+from mosaico.media import Media
+from mosaico.script_generators.news.prompts import (
+    MEDIA_SUGGESTING_PROMPT,
+    SHOOTING_SCRIPT_PROMPT,
+    SUMMARIZE_CONTEXT_PROMPT,
+)
+from mosaico.script_generators.script import ShootingScript
+
+
+if TYPE_CHECKING:
+    from openai.types.chat import ChatCompletionMessageParam
+
+
+class ParagraphMediaSuggestion(BaseModel):
+    """A media suggestion for a paragraph."""
+
+    paragraph: str
+    """The paragraph content to which the media object corresponds."""
+
+    media_id: str
+    """The media reference for the shot."""
+
+    type: AssetType
+    """The type of media (image, video, or audio)."""
+
+    relevance: str
+    """How it relates to the specific paragraph."""
+
+
+class ParagraphMediaSuggestions(BaseModel):
+    """A list of media suggestions for paragraphs."""
+
+    suggestions: list[ParagraphMediaSuggestion]
+    """The list of paragraph media suggestions."""
+
+
+class NewsVideoScriptGenerator:
+    def __init__(
+        self,
+        context: str,
+        model: str = "gpt-4o",
+        model_params: dict[str, Any] | None = None,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        num_paragraphs: int = 5,
+        language: str | LanguageAlpha2 | None = None,
+        timeout: float = 120,
+    ) -> None:
+        try:
+            import instructor
+            import litellm
+        except ImportError:
+            raise ImportError(
+                "The 'instructor' and 'litellm' packages are required for using the NewsVideoScriptGenerator."
+            )
+        self.context = context
+        self.model = model
+        self.model_params = model_params or {"temperature": 0}
+        self.num_paragraphs = num_paragraphs
+        self.language = LanguageAlpha2(language) if language is not None else LanguageAlpha2("en")
+        self.client = instructor.from_litellm(litellm.completion, api_key=api_key, base_url=base_url, timeout=timeout)
+
+    def generate(self, media: Sequence[Media], **kwargs: Any) -> ShootingScript:
+        """
+        Generate scenes for a project with AI.
+
+        :param media: The list of media objects.
+        :param kwargs: Additional context for the scene generation.
+        :return: A tuple containing the scenes and assets generated from the media files.
+        """
+        paragraphs = self._summarize_context(self.context, self.num_paragraphs, self.language)
+        suggestions = self._suggest_paragraph_media(paragraphs, media)
+        shooting_script = self._generate_shooting_script(suggestions)
+        return shooting_script
+
+    def _summarize_context(self, context: str, num_paragraphs: int, language: LanguageAlpha2) -> list[str]:
+        """
+        Summarize the context to provide a brief overview of the article.
+        """
+        paragraphs_prompt = SUMMARIZE_CONTEXT_PROMPT.format(
+            context=context, num_paragraphs=num_paragraphs, language=language.name
+        )
+        paragraphs = self._fetch_completion(paragraphs_prompt, response_type=str)
+        return paragraphs
+
+    def _suggest_paragraph_media(self, paragraphs: list[str], media: Sequence[Media]) -> list[ParagraphMediaSuggestion]:
+        """
+        Suggest media usage based on the media objects.
+        """
+        formatted_media = _build_media_string(media)
+        prompt = MEDIA_SUGGESTING_PROMPT.format(paragraphs=paragraphs, media_objects=formatted_media)
+        suggestions = self._fetch_completion(prompt, response_type=ParagraphMediaSuggestions)
+        return suggestions.suggestions
+
+    def _generate_shooting_script(self, suggestions: list[ParagraphMediaSuggestion]) -> ShootingScript:
+        """
+        Generate the shooting script.
+        """
+        prompt = SHOOTING_SCRIPT_PROMPT.format(suggestions=suggestions)
+        shooting_script = self._fetch_completion(prompt, response_type=ShootingScript)
+        return shooting_script
+
+    def _fetch_completion(
+        self,
+        user_message: str,
+        system_message: str = "You are a helpful assistant.",
+        *,
+        response_type: type[Any],
+        **kwargs: Any,
+    ) -> Any:
+        """
+        Fetch a completion from the AI model.
+        """
+        messages: list[ChatCompletionMessageParam] = [
+            {"role": "system", "content": system_message},
+            {"role": "user", "content": user_message},
+        ]
+        model_params = self.model_params | kwargs
+        return self.client.chat.completions.create(
+            model=self.model, messages=messages, response_model=response_type, **model_params
+        )
+
+
+def _format_media(media: Media, index: int) -> str:
+    """
+    Format a media object as a string for display.
+    """
+    description = media.description
+    mime_type = media.mime_type or "text/plain"
+    return f"Media ID: {media.id}\nMIME type: {mime_type}\nDescription: {description}\n\n"
+
+
+def _build_media_string(medias: Sequence[Media]) -> str:
+    """
+    Build context and media strings for generating a script.
+    """
+    media_str = ""
+    for index, media in enumerate(medias):
+        fmt_media = _format_media(media, index)
+        media_str += fmt_media
+    return media_str
diff --git a/src/mosaico/script_generators/news/prompts.py b/src/mosaico/script_generators/news/prompts.py
@@ -0,0 +1,73 @@
+import textwrap
+
+
+SUMMARIZE_CONTEXT_PROMPT = textwrap.dedent(
+    """
+    INSTRUCTIONS:
+    You are a helpful news assistant tasked with summarizing the key points of the following context for a journalist
+    in paragraphs. Your summary should be concise, informative, and capture the most important details of the context.
+    The summary will be used by the journalist to produce a self-contained shooting script for an informative video
+    based on the context provided.
+
+
+    OUTPUT GUIDELINES:
+    - The summary should have {num_paragraphs} paragraphs.
+    - Each paragraph should be 1 sentence long.
+    - Adhere to the best practices of journalistic writing.
+    - Return only the paragraphs in {language} without any additional information.
+
+    CONTEXT:
+    {context}
+
+    SUMMARY:
+    """
+).strip()
+
+MEDIA_SUGGESTING_PROMPT = textwrap.dedent(
+    """
+    INSTRUCTIONS:
+    You are a helpful news assistant tasked with selecting media objects from the provided collection to enhance
+    the visual appeal and storytelling of an informative video. Your selections should be relevant, engaging, and
+    directly correspond to the content of each paragraph.
+
+    From the media objects provided, you will select items that best match the content of each paragraph. Your goal
+    is to choose media that will enhance the viewer's understanding and create a compelling visual narrative.
+
+    OUTPUT GUIDELINES:
+    - For each paragraph, select one media object from the provided collection
+    - Only select media objects that are available in the provided collection
+    - Avoid selecting the same media object for multiple paragraphs
+    - Answer only with the structured response format in the same language as the paragraphs
+
+    PARAGRAPHS:
+    {paragraphs}
+
+    AVAILABLE MEDIA OBJECTS:
+    {media_objects}
+
+    SUGGESTIONS:
+    """
+).strip()
+
+
+SHOOTING_SCRIPT_PROMPT = textwrap.dedent(
+    """
+    INSTRUCTIONS:
+    You are an experienced journalist and scriptwriter tasked with creating a detailed shooting script for an
+    informative video based on the following paragraphs and media objects. Your script should suggest specific
+    shot, effects, and narration that effectively tell the story while incorporating the media assets.
+
+    The script should maintain journalistic standards of accuracy and objectivity while being engaging for viewers.
+    Make sure each suggested media object is thoughtfully integrated to enhance the narrative flow.
+
+    OUTPUT GUIDELINES:
+    - Provide a detailed shooting script that includes shots, effects, and timings.
+    - Use the paragraphs as subtitles for each shot. Keep them as they are.
+    - Respond only with the structured output format in the same language as the paragraphs.
+
+    PARAGRAPHS AND MEDIA OBJECTS SUGGESTIONS:
+    {suggestions}
+
+    SHOOTING SCRIPT:
+    """
+).strip()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -355,4 +355,3 @@ fabric.properties
		!.idea/runConfigurations

		# End of https://www.toptal.com/developers/gitignore/api/pycharm+all
		src/mosaico/script_generators/news/*
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,4 @@
		from mosaico.script_generators.news.generator import NewsVideoScriptGenerator


		__all__ = ["NewsVideoScriptGenerator"]