diff --git a/assets/sample.jpg b/assets/sample.jpg new file mode 100644 index 0000000..36d95c4 Binary files /dev/null and b/assets/sample.jpg differ diff --git a/generated_image.png b/generated_image.png new file mode 100644 index 0000000..a2b3e0e Binary files /dev/null and b/generated_image.png differ diff --git a/main.py b/main.py index 3e65850..0929e8e 100644 --- a/main.py +++ b/main.py @@ -10,12 +10,13 @@ import re import difflib from utils.addMusic import add_bgm -from utils.utils import spanish_title_case, english_title_case, get_day_of_week, get_upload_date +from utils.utils import spanish_title_case, english_title_case, get_day_of_week, get_upload_date, compress_image_to_target_size import sys -from newsLetter.newsletter import send_newsletter, extract_podcast_description, format_newsletter +#from newsLetter.newsletter import send_newsletter, extract_podcast_description, format_newsletter from utils.uploadPodbean import upload_podcast_episode import json import pytz +import requests # Setup basic configuration for logging @@ -29,7 +30,7 @@ RETRY_DELAY = 2 # seconds in case of retries PRODUCTION_MODE = True # Set to True to enable audio file generation BGM_PATH = "assets/bgm.mp3" -STATUS = "future" # can change to draft for testing +STATUS = "draft" # can change to draft for testing TYPE = "public" pdt = pytz.timezone('America/Los_Angeles') @@ -87,13 +88,7 @@ def get_top_news(self): grouped_sources = scrape_and_group_by_source(self.date) formatted_text = format_grouped_titles_by_source(grouped_sources) - input_ask = '''Suppose you are the chief editor at CNBC-TechCheck-Briefing. You need to select 5 most important news events to put into today's briefing(You might be able to see some hint by how many times a news event is reported, but also consider what your audience of CNBC-TechCheck-Briefing is interested in). Return the title of the event in order of importance for these unqiue events. Also, exclude these news events talked about yesterday: - microsoft wants to make windows an ai operating system, launches copilot+ pcs - scarlett johansson says openai ripped off her voice after she said the company can't use it - microsoft announces new pcs with ai chips from qualcomm - microsoft surface event: the 6 biggest announcements - in biometric 'breakthrough' year, you may soon start paying with your face - + input_ask = '''Suppose you are the chief editor at CNBC-TechCheck-Briefing. You need to select 5 most important news events to put into today's briefing(You might be able to see some hint by how many times a news event is reported, but also consider what your audience of CNBC-TechCheck-Briefing is interested in). Return the title of the event in order of importance for these unqiue events. Here are the news of today:\n''' + formatted_text role = "Output the response as string titles in the seperated by newline. Each title should be exactly how it is in the news source." @@ -240,7 +235,45 @@ def generate_podcast_title(self, transcript, language=None): output_response_prompt = f"Output the Title in {language}." input_ask = "Generate a title for this podcast. Must include three key topics (if there are many, choose the three most important ones). Incorporate emojis where appropriate. Pay attention to capitalization of titles. Follow the style of titles such as: Tesla Showcases FSD Demo 🚗, Adam Neuman's WeWork Bid 💰, CSV Conundrums 🖥️,Anthropic’s $4B Amazon Boost 💰, Brex's Valuation Leap to $12B 💳, Strategies for Success ✨,The OpenAI Voice Revolution 🗣️, AI Safety Measures 🦺, LLMs Go Mobile 📱. Here's the transcript excerpt: " + transcript + "\n" + output_response_prompt + "\nTitle:" return self.ask_gpt(input_ask) - + + def generate_podcast_cover(self, titles, dir): + """ Genearted podcast cover art from the provided titles. """ + + gpt_prompt = f''' + {titles} + Use these news titles, generate a description of image formed if I'm using these three titles to create a cover image for a podcast episode. I don't want any text in the image. + Only output the description. + Example: + A cover image with three main sections. On the left, a sleek, futuristic PC with the Microsoft logo and holographic elements showcases AI integration. On the right, an abstract representation of Scarlett Johansson and the OpenAI logo, suggesting a confrontation or debate with stylized silhouettes or digital avatars. At the bottom, a hand holding a smartphone with a facial recognition interface and payment confirmation screen, surrounded by symbols of digital transactions like credit card icons or currency symbols. The background features digital circuits, holographic effects, and vibrant colors. + ''' + image_prompt = self.ask_gpt( + input_ask=gpt_prompt, + ) + + response = self.openai_client.images.generate( + model="dall-e-3", + prompt=image_prompt, + size="1024x1024", + quality="standard", + n=1, + ) + + image_url = response.data[0].url + print(f"Generated image URL: {image_url}") + + image_response = requests.get(image_url) + + # Check if the request was successful + if image_response.status_code == 200: + with open(f'{dir}generated_image.jpeg', 'wb') as file: + file.write(image_response.content) + print("Image downloaded successfully!") + compress_image_to_target_size(f'{dir}generated_image.jpeg', 1) + else: + print("Failed to download the image") + + return image_prompt,f'{dir}generated_image.jpeg' + def remove_leading_numbers(lst): # This regular expression matches any leading numbers followed by a dot and any amount of whitespace @@ -296,8 +329,9 @@ def remove_leading_numbers(lst): podcast_description = orchestrator.generate_podcast_description( polished_script) podcast_title = episode_number + \ - english_title_case( - orchestrator.generate_podcast_title(polished_script)) + english_title_case(orchestrator.generate_podcast_title(polished_script)) + + image_prompt, podcast_cover = orchestrator.generate_podcast_cover(podcast_title, output_directory) # Text to Speech for each language, including the original English if PRODUCTION_MODE: for language, cur_script in [('English', polished_script)]: @@ -309,6 +343,7 @@ def remove_leading_numbers(lst): f"Podcast in {language} completed successfully. Audio file at: {audio_file_path}") else: logging.error(f"Failed to generate {language} audio file.") + # Prepare the output text data # output_data = f"Titles:\n{chr(10).join(titles)}\n\ntop_news_prompt: {top_news_prompt}\n\nTop News:\n{chr(10).join(top_news)}\n\nGenerate_scipt_prompt:\n{generate_script_prompt}\n\nScript:\n{script}\n\npolished_script:\n{polished_script}\n\nPodcast Title:\n{podcast_title}\n\npodcast_description:\n{podcast_description}\n" @@ -321,7 +356,8 @@ def remove_leading_numbers(lst): "Script": script, "Polished Script": polished_script, "Podcast Title": podcast_title, - "Podcast Description": podcast_description + "Podcast Description": podcast_description, + "Image Prompt": image_prompt, } # Define the output file path @@ -336,8 +372,8 @@ def remove_leading_numbers(lst): file_path = f"{output_directory}English_final_podcast.mp3" - print(publish_unix) - upload_podcast_episode(CLIENT_ID, CLIENT_SECRET, file_path, podcast_title, podcast_description, STATUS, TYPE, episode_prefix, publish_unix) + # print(publish_unix) + upload_podcast_episode(CLIENT_ID, CLIENT_SECRET, file_path, podcast_cover, podcast_title,podcast_description, STATUS, TYPE, episode_prefix, publish_unix) else: diff --git a/output/2024-05-22/English_final_podcast.mp3 b/output/2024-05-22/English_final_podcast.mp3 new file mode 100644 index 0000000..08b5ac8 Binary files /dev/null and b/output/2024-05-22/English_final_podcast.mp3 differ diff --git a/output/2024-05-22/generated_image.jpeg b/output/2024-05-22/generated_image.jpeg new file mode 100644 index 0000000..999ff97 Binary files /dev/null and b/output/2024-05-22/generated_image.jpeg differ diff --git a/test.py b/test.py index eaaab44..aa7fbe4 100644 --- a/test.py +++ b/test.py @@ -1,27 +1,11 @@ -from newsScraper import scrape_cnbctech, is_today, scrape_verge, scrape_techcrunch -from datetime import datetime +import openai import requests -from bs4 import BeautifulSoup -import re -import datetime -from utils.utils import get_day_of_week, get_next_weekday, get_upload_date, spanish_title_case, english_title_case +import base64 -today = datetime.date.today() +from openai import OpenAI +from dotenv import load_dotenv +import os +from utils.utils import spanish_title_case, english_title_case -def test_scrape_cnbctech(): - url = 'https://techcrunch.com/' - response = requests.get(url) - soup = BeautifulSoup(response.text, 'html.parser') - data_links = soup.find_all('a', attrs={'data-destinationlink': True}) - - # Debugging: print the number of data_links found - print(f"Number of data links found: {len(data_links)}") - - # Extract the href attributes and text of these links - articles = [[link.text.strip(), link['href']] for link in data_links if is_today(link['href'], current_dat) and len(link.text.strip()) > 0] - print(articles) - return articles - - -print(get_upload_date('2024-05-21')) \ No newline at end of file +print(english_title_case("🔜 Nvidia’s new annual ai chip rollout 🧠, Amazon alexa’s ai overhaul with subscription 🎙️, microsoft ties executive pay to cybersecurity 💼")) \ No newline at end of file diff --git a/utils/uploadPodbean.py b/utils/uploadPodbean.py index 8a97383..286252b 100644 --- a/utils/uploadPodbean.py +++ b/utils/uploadPodbean.py @@ -12,9 +12,11 @@ CONTENT = "Time you enjoy wasting, wdsfas not wasted." # description STATUS = "draft" TYPE = "public" -FILE_NAME = "output/2024-05-16/English_final_podcast.mp3" +PODCAST_FILE_NAME = "output/2024-05-16/English_final_podcast.mp3" +PODCAST_COVER_ART = "assets/sample.jpg" -def upload_podcast_episode(client_id, client_secret, file_name, title, content, status, type_, episode_number=None, publish_timestamp=None): + +def upload_podcast_episode(client_id, client_secret, podcast_file_name, podcast_cover_art, title, content, status, type_, episode_number=None, publish_timestamp=None): # Step 1: Obtain OAuth token def get_oauth_token(client_id, client_secret): url = 'https://api.podbean.com/v1/oauth/token' @@ -34,7 +36,7 @@ def get_file_size(filename): return os.path.getsize(filename) # Step 3: Get upload authorization and presigned URL - def get_upload_authorization(access_token, filename): + def get_upload_authorization(access_token, filename, content_type='audio/mpeg'): url = 'https://api.podbean.com/v1/files/uploadAuthorize' absolute_path = os.path.abspath(filename) @@ -49,7 +51,7 @@ def get_upload_authorization(access_token, filename): 'access_token': access_token, 'filename': os.path.basename(absolute_path), 'filesize': filesize, - 'content_type': 'audio/mpeg' + 'content_type': content_type } headers = {'User-Agent': USER_AGENT} @@ -64,19 +66,22 @@ def get_upload_authorization(access_token, filename): return None, None # Step 4: Upload file using presigned URL - def upload_file_to_presigned_url(presigned_url, filename): - headers = {'Content-Type': 'audio/mpeg'} + def upload_file_to_presigned_url(presigned_url, filename, content_type='audio/mpeg'): + if presigned_url is None: + print("Presigned URL is missing.") + return False + headers = {'Content-Type': content_type} with open(filename, 'rb') as f: response = requests.put(presigned_url, data=f, headers=headers) if response.status_code == 200: - print("File successfully uploaded.") + print(f"{content_type}: File successfully uploaded.") return True else: - print("Failed to upload file:", response.status_code, response.text) + print(f"{content_type}: Failed to upload file:", response.status_code, response.text) return False # Step 5: Publish episode - def publish_episode(access_token, title, content, status, type_, media_key, episode_number=None, publish_timestamp=None): + def publish_episode(access_token, title, content, status, type_, media_key, logo_key, episode_number=None, publish_timestamp=None): url = 'https://api.podbean.com/v1/episodes' headers = { 'User-Agent': USER_AGENT, @@ -88,6 +93,7 @@ def publish_episode(access_token, title, content, status, type_, media_key, epis 'status': status, 'type': type_, 'media_key': media_key, + 'logo_key': logo_key, 'episode_number': episode_number, 'publish_timestamp': publish_timestamp } @@ -108,13 +114,20 @@ def publish_episode(access_token, title, content, status, type_, media_key, epis access_token = get_oauth_token(client_id, client_secret) if access_token: - presigned_url, media_key = get_upload_authorization(access_token, file_name) - if presigned_url and media_key: - if upload_file_to_presigned_url(presigned_url, file_name): - publish_episode(access_token, title, content, status, type_, media_key, episode_number, publish_timestamp) - + presigned_url, media_key = get_upload_authorization(access_token, podcast_file_name, content_type='audio/mpeg') + presigned_url2, logo_key = get_upload_authorization(access_token, podcast_cover_art, content_type='image/jpg') + + if presigned_url and media_key and presigned_url2 and logo_key: + if upload_file_to_presigned_url(presigned_url, podcast_file_name, content_type='audio/mpeg') and upload_file_to_presigned_url(presigned_url2, podcast_cover_art, content_type='image/jpg'): + publish_episode(access_token, title, content, status, type_, media_key,logo_key, episode_number, publish_timestamp) + else: + print("Failed to upload file or get presigned URL.") + else: + print("Failed to upload file or get presigned URL.") + + if __name__ == '__main__': CLIENT_ID = os.getenv("PODBEAN_CLIENT_ID") CLIENT_SECRET = os.getenv("PODBEAN_CLIENT_SECRET") - upload_podcast_episode(CLIENT_ID, CLIENT_SECRET, FILE_NAME, TITLE, CONTENT, STATUS, TYPE, 24, 1670000000) + upload_podcast_episode(CLIENT_ID, CLIENT_SECRET, PODCAST_FILE_NAME,PODCAST_COVER_ART, TITLE, CONTENT, STATUS, TYPE, 24) diff --git a/utils/utils.py b/utils/utils.py index fc0f958..b857418 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -1,6 +1,9 @@ from datetime import datetime from datetime import datetime, timedelta import pytz +from PIL import Image +import os + def get_day_of_week(date): date_obj = datetime.strptime(date, '%Y-%m-%d') @@ -67,4 +70,29 @@ def english_title_case(text): new_title.append(word.lower()) else: new_title.append(word.capitalize()) - return ' '.join(new_title) \ No newline at end of file + return ' '.join(new_title) + + +def compress_image_to_target_size(input_path, target_size_mb, initial_quality=85, step=5): + """ + Compresses an image to ensure its size is below a target size in MB, overwriting the original image. + + :param input_path: Path to the input image. + :param target_size_mb: Target size in MB. + :param initial_quality: Initial quality for compression. + :param step: Step to reduce quality in each iteration. + """ + target_size_bytes = target_size_mb * 1024 * 1024 + quality = initial_quality + + with Image.open(input_path) as img: + while True: + img.save(input_path, 'JPEG', quality=quality) + output_size = os.path.getsize(input_path) + + if output_size <= target_size_bytes or quality <= step: + break + + quality -= step + if quality <= 0: + raise ValueError("Cannot compress image to the desired size.")