Add custom art

an-bluecat · May 23, 2024 · 21d6f64 · 21d6f64
1 parent bbdb4b4
commit 21d6f64
Show file tree

Hide file tree

Showing 8 changed files with 116 additions and 55 deletions.
diff --git a/assets/sample.jpg b/assets/sample.jpg
diff --git a/generated_image.png b/generated_image.png
diff --git a/main.py b/main.py
@@ -10,12 +10,13 @@
 import re
 import difflib
 from utils.addMusic import add_bgm
-from utils.utils import spanish_title_case, english_title_case, get_day_of_week, get_upload_date
+from utils.utils import spanish_title_case, english_title_case, get_day_of_week, get_upload_date, compress_image_to_target_size
 import sys
-from newsLetter.newsletter import send_newsletter, extract_podcast_description, format_newsletter
+#from newsLetter.newsletter import send_newsletter, extract_podcast_description, format_newsletter
 from utils.uploadPodbean import upload_podcast_episode
 import json
 import pytz
+import requests
 
 
 # Setup basic configuration for logging
@@ -29,7 +30,7 @@
 RETRY_DELAY = 2  # seconds in case of retries
 PRODUCTION_MODE = True  # Set to True to enable audio file generation
 BGM_PATH = "assets/bgm.mp3"
-STATUS = "future" # can change to draft for testing
+STATUS = "draft" # can change to draft for testing
 TYPE = "public"
 pdt = pytz.timezone('America/Los_Angeles')
 
@@ -87,13 +88,7 @@ def get_top_news(self):
         grouped_sources = scrape_and_group_by_source(self.date)
         formatted_text = format_grouped_titles_by_source(grouped_sources)
 
-        input_ask = '''Suppose you are the chief editor at CNBC-TechCheck-Briefing. You need to select 5 most important news events to put into today's briefing(You might be able to see some hint by how many times a news event is reported, but also consider what your audience of CNBC-TechCheck-Briefing is interested in). Return the title of the event in order of importance for these unqiue events. Also, exclude these news events talked about yesterday:
-                microsoft wants to make windows an ai operating system, launches copilot+ pcs
-                scarlett johansson says openai ripped off her voice after she said the company can't use it
-                microsoft announces new pcs with ai chips from qualcomm
-                microsoft surface event: the 6 biggest announcements
-                in biometric 'breakthrough' year, you may soon start paying with your face
-
+        input_ask = '''Suppose you are the chief editor at CNBC-TechCheck-Briefing. You need to select 5 most important news events to put into today's briefing(You might be able to see some hint by how many times a news event is reported, but also consider what your audience of CNBC-TechCheck-Briefing is interested in). Return the title of the event in order of importance for these unqiue events.
             Here are the news of today:\n''' + formatted_text
         role = "Output the response as string titles in the seperated by newline. Each title should be exactly how it is in the news source."
 
@@ -240,7 +235,45 @@ def generate_podcast_title(self, transcript, language=None):
             output_response_prompt = f"Output the Title in {language}."
         input_ask = "Generate a title for this podcast. Must include three key topics (if there are many, choose the three most important ones). Incorporate emojis where appropriate. Pay attention to capitalization of titles. Follow the style of titles such as: Tesla Showcases FSD Demo 🚗, Adam Neuman's WeWork Bid 💰, CSV Conundrums 🖥️,Anthropic’s $4B Amazon Boost 💰, Brex's Valuation Leap to $12B 💳, Strategies for Success ✨,The OpenAI Voice Revolution 🗣️, AI Safety Measures 🦺, LLMs Go Mobile 📱. Here's the transcript excerpt: " + transcript + "\n" + output_response_prompt + "\nTitle:"
         return self.ask_gpt(input_ask)
-
+
+    def generate_podcast_cover(self, titles, dir):
+        """ Genearted podcast cover art from the provided titles. """
+
+        gpt_prompt = f'''
+        {titles}
+        Use these news titles, generate a description of image formed if I'm using these three titles to create a cover image for a podcast episode. I don't want any text in the image. 
+        Only output the description.
+        Example: 
+        A cover image with three main sections. On the left, a sleek, futuristic PC with the Microsoft logo and holographic elements showcases AI integration. On the right, an abstract representation of Scarlett Johansson and the OpenAI logo, suggesting a confrontation or debate with stylized silhouettes or digital avatars. At the bottom, a hand holding a smartphone with a facial recognition interface and payment confirmation screen, surrounded by symbols of digital transactions like credit card icons or currency symbols. The background features digital circuits, holographic effects, and vibrant colors.  
+        '''
+        image_prompt = self.ask_gpt(
+            input_ask=gpt_prompt,
+        )
+
+        response = self.openai_client.images.generate(
+            model="dall-e-3",
+            prompt=image_prompt,
+            size="1024x1024",
+            quality="standard",
+            n=1,
+        )
+
+        image_url = response.data[0].url
+        print(f"Generated image URL: {image_url}")
+
+        image_response = requests.get(image_url)
+
+        # Check if the request was successful
+        if image_response.status_code == 200:
+            with open(f'{dir}generated_image.jpeg', 'wb') as file:
+                file.write(image_response.content)
+            print("Image downloaded successfully!")
+            compress_image_to_target_size(f'{dir}generated_image.jpeg', 1)
+        else:
+            print("Failed to download the image")
+
+        return image_prompt,f'{dir}generated_image.jpeg'
+
 
 def remove_leading_numbers(lst):
     # This regular expression matches any leading numbers followed by a dot and any amount of whitespace
@@ -296,8 +329,9 @@ def remove_leading_numbers(lst):
         podcast_description = orchestrator.generate_podcast_description(
             polished_script)
         podcast_title = episode_number + \
-            english_title_case(
-                orchestrator.generate_podcast_title(polished_script))
+                english_title_case(orchestrator.generate_podcast_title(polished_script))
+
+        image_prompt, podcast_cover = orchestrator.generate_podcast_cover(podcast_title, output_directory)  
             # Text to Speech for each language, including the original English
         if PRODUCTION_MODE:
             for language, cur_script in [('English', polished_script)]:
@@ -309,6 +343,7 @@ def remove_leading_numbers(lst):
                         f"Podcast in {language} completed successfully. Audio file at: {audio_file_path}")
                 else:
                     logging.error(f"Failed to generate {language} audio file.")
+
 
             # Prepare the output text data
             # output_data = f"Titles:\n{chr(10).join(titles)}\n\ntop_news_prompt: {top_news_prompt}\n\nTop News:\n{chr(10).join(top_news)}\n\nGenerate_scipt_prompt:\n{generate_script_prompt}\n\nScript:\n{script}\n\npolished_script:\n{polished_script}\n\nPodcast Title:\n{podcast_title}\n\npodcast_description:\n{podcast_description}\n"
@@ -321,7 +356,8 @@ def remove_leading_numbers(lst):
             "Script": script,
             "Polished Script": polished_script,
             "Podcast Title": podcast_title,
-            "Podcast Description": podcast_description
+            "Podcast Description": podcast_description,
+            "Image Prompt": image_prompt,
         }
 
         # Define the output file path
@@ -336,8 +372,8 @@ def remove_leading_numbers(lst):
 
         file_path = f"{output_directory}English_final_podcast.mp3"
 
-        print(publish_unix)
-        upload_podcast_episode(CLIENT_ID, CLIENT_SECRET, file_path, podcast_title, podcast_description, STATUS, TYPE, episode_prefix, publish_unix)
+       # print(publish_unix)
+        upload_podcast_episode(CLIENT_ID, CLIENT_SECRET, file_path, podcast_cover, podcast_title,podcast_description, STATUS, TYPE, episode_prefix, publish_unix)
 
 
     else:

diff --git a/output/2024-05-22/English_final_podcast.mp3 b/output/2024-05-22/English_final_podcast.mp3
diff --git a/output/2024-05-22/generated_image.jpeg b/output/2024-05-22/generated_image.jpeg
diff --git a/test.py b/test.py
@@ -1,27 +1,11 @@
-from newsScraper import scrape_cnbctech, is_today, scrape_verge, scrape_techcrunch
-from datetime import datetime
+import openai
 import requests
-from bs4 import BeautifulSoup
-import re
-import datetime
-from utils.utils import get_day_of_week, get_next_weekday, get_upload_date, spanish_title_case, english_title_case
+import base64
 
-today =   datetime.date.today()
+from openai import OpenAI
+from dotenv import load_dotenv
+import os
+from utils.utils import spanish_title_case, english_title_case
 
 
-def test_scrape_cnbctech():
-    url = 'https://techcrunch.com/'
-    response = requests.get(url)
-    soup = BeautifulSoup(response.text, 'html.parser')
-    data_links = soup.find_all('a', attrs={'data-destinationlink': True})
-
-    # Debugging: print the number of data_links found
-    print(f"Number of data links found: {len(data_links)}")
-
-    # Extract the href attributes and text of these links
-    articles = [[link.text.strip(), link['href']] for link in data_links if is_today(link['href'], current_dat) and len(link.text.strip()) > 0]
-    print(articles)
-    return articles
-
-
-print(get_upload_date('2024-05-21'))
+print(english_title_case("🔜 Nvidia’s new annual ai chip rollout 🧠, Amazon alexa’s ai overhaul with subscription 🎙️, microsoft ties executive pay to cybersecurity 💼"))
diff --git a/utils/uploadPodbean.py b/utils/uploadPodbean.py
@@ -12,9 +12,11 @@
 CONTENT = "Time you <b>enjoy</b> wasting, wdsfas not wasted."  # description
 STATUS = "draft"
 TYPE = "public"
-FILE_NAME = "output/2024-05-16/English_final_podcast.mp3"
+PODCAST_FILE_NAME = "output/2024-05-16/English_final_podcast.mp3"
+PODCAST_COVER_ART = "assets/sample.jpg"
 
-def upload_podcast_episode(client_id, client_secret, file_name, title, content, status, type_, episode_number=None, publish_timestamp=None):
+
+def upload_podcast_episode(client_id, client_secret, podcast_file_name, podcast_cover_art, title, content, status, type_, episode_number=None, publish_timestamp=None):
     # Step 1: Obtain OAuth token
     def get_oauth_token(client_id, client_secret):
         url = 'https://api.podbean.com/v1/oauth/token'
@@ -34,7 +36,7 @@ def get_file_size(filename):
         return os.path.getsize(filename)
 
     # Step 3: Get upload authorization and presigned URL
-    def get_upload_authorization(access_token, filename):
+    def get_upload_authorization(access_token, filename, content_type='audio/mpeg'):
         url = 'https://api.podbean.com/v1/files/uploadAuthorize'
 
         absolute_path = os.path.abspath(filename)
@@ -49,7 +51,7 @@ def get_upload_authorization(access_token, filename):
             'access_token': access_token,
             'filename': os.path.basename(absolute_path),
             'filesize': filesize,
-            'content_type': 'audio/mpeg'
+            'content_type': content_type
         }
         headers = {'User-Agent': USER_AGENT}
 
@@ -64,19 +66,22 @@ def get_upload_authorization(access_token, filename):
             return None, None
 
     # Step 4: Upload file using presigned URL
-    def upload_file_to_presigned_url(presigned_url, filename):
-        headers = {'Content-Type': 'audio/mpeg'}
+    def upload_file_to_presigned_url(presigned_url, filename, content_type='audio/mpeg'):
+        if presigned_url is None:
+            print("Presigned URL is missing.")
+            return False
+        headers = {'Content-Type': content_type}
         with open(filename, 'rb') as f:
             response = requests.put(presigned_url, data=f, headers=headers)
         if response.status_code == 200:
-            print("File successfully uploaded.")
+            print(f"{content_type}: File successfully uploaded.")
             return True
         else:
-            print("Failed to upload file:", response.status_code, response.text)
+            print(f"{content_type}: Failed to upload file:", response.status_code, response.text)
             return False
 
     # Step 5: Publish episode
-    def publish_episode(access_token, title, content, status, type_, media_key, episode_number=None, publish_timestamp=None):
+    def publish_episode(access_token, title, content, status, type_, media_key, logo_key, episode_number=None, publish_timestamp=None):
         url = 'https://api.podbean.com/v1/episodes'
         headers = {
             'User-Agent': USER_AGENT,
@@ -88,6 +93,7 @@ def publish_episode(access_token, title, content, status, type_, media_key, epis
             'status': status,
             'type': type_,
             'media_key': media_key,
+            'logo_key': logo_key,
             'episode_number':  episode_number,
             'publish_timestamp': publish_timestamp
         }
@@ -108,13 +114,20 @@ def publish_episode(access_token, title, content, status, type_, media_key, epis
 
     access_token = get_oauth_token(client_id, client_secret)
     if access_token:
-        presigned_url, media_key = get_upload_authorization(access_token, file_name)
-        if presigned_url and media_key:
-            if upload_file_to_presigned_url(presigned_url, file_name):
-                publish_episode(access_token, title, content, status, type_, media_key, episode_number, publish_timestamp)
-
+        presigned_url, media_key = get_upload_authorization(access_token, podcast_file_name, content_type='audio/mpeg')
+        presigned_url2, logo_key = get_upload_authorization(access_token, podcast_cover_art, content_type='image/jpg')
+
+        if presigned_url and media_key and presigned_url2 and logo_key:
+            if upload_file_to_presigned_url(presigned_url, podcast_file_name, content_type='audio/mpeg') and upload_file_to_presigned_url(presigned_url2, podcast_cover_art, content_type='image/jpg'):
+                publish_episode(access_token, title, content, status, type_, media_key,logo_key, episode_number, publish_timestamp)
+            else: 
+                print("Failed to upload file or get presigned URL.")
+        else:
+            print("Failed to upload file or get presigned URL.")
+
+
 if __name__ == '__main__':
     CLIENT_ID = os.getenv("PODBEAN_CLIENT_ID")
     CLIENT_SECRET = os.getenv("PODBEAN_CLIENT_SECRET")
 
-    upload_podcast_episode(CLIENT_ID, CLIENT_SECRET, FILE_NAME, TITLE, CONTENT, STATUS, TYPE, 24, 1670000000)
+    upload_podcast_episode(CLIENT_ID, CLIENT_SECRET, PODCAST_FILE_NAME,PODCAST_COVER_ART, TITLE, CONTENT, STATUS, TYPE, 24)
diff --git a/utils/utils.py b/utils/utils.py
@@ -1,6 +1,9 @@
 from datetime import datetime
 from datetime import datetime, timedelta
 import pytz
+from PIL import Image
+import os
+
 
 def get_day_of_week(date):
     date_obj = datetime.strptime(date, '%Y-%m-%d')
@@ -67,4 +70,29 @@ def english_title_case(text):
             new_title.append(word.lower())
         else:
             new_title.append(word.capitalize())
-    return ' '.join(new_title)
+    return ' '.join(new_title)
+
+
+def compress_image_to_target_size(input_path, target_size_mb, initial_quality=85, step=5):
+    """
+    Compresses an image to ensure its size is below a target size in MB, overwriting the original image.
+
+    :param input_path: Path to the input image.
+    :param target_size_mb: Target size in MB.
+    :param initial_quality: Initial quality for compression.
+    :param step: Step to reduce quality in each iteration.
+    """
+    target_size_bytes = target_size_mb * 1024 * 1024
+    quality = initial_quality
+
+    with Image.open(input_path) as img:
+        while True:
+            img.save(input_path, 'JPEG', quality=quality)
+            output_size = os.path.getsize(input_path)
+
+            if output_size <= target_size_bytes or quality <= step:
+                break
+
+            quality -= step
+            if quality <= 0:
+                raise ValueError("Cannot compress image to the desired size.")