Skip to content

Commit

Permalink
Add custom art
Browse files Browse the repository at this point in the history
  • Loading branch information
tonychang04 committed May 23, 2024
1 parent bbdb4b4 commit 21d6f64
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 55 deletions.
Binary file added assets/sample.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added generated_image.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
68 changes: 52 additions & 16 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
import re
import difflib
from utils.addMusic import add_bgm
from utils.utils import spanish_title_case, english_title_case, get_day_of_week, get_upload_date
from utils.utils import spanish_title_case, english_title_case, get_day_of_week, get_upload_date, compress_image_to_target_size
import sys
from newsLetter.newsletter import send_newsletter, extract_podcast_description, format_newsletter
#from newsLetter.newsletter import send_newsletter, extract_podcast_description, format_newsletter
from utils.uploadPodbean import upload_podcast_episode
import json
import pytz
import requests


# Setup basic configuration for logging
Expand All @@ -29,7 +30,7 @@
RETRY_DELAY = 2 # seconds in case of retries
PRODUCTION_MODE = True # Set to True to enable audio file generation
BGM_PATH = "assets/bgm.mp3"
STATUS = "future" # can change to draft for testing
STATUS = "draft" # can change to draft for testing
TYPE = "public"
pdt = pytz.timezone('America/Los_Angeles')

Expand Down Expand Up @@ -87,13 +88,7 @@ def get_top_news(self):
grouped_sources = scrape_and_group_by_source(self.date)
formatted_text = format_grouped_titles_by_source(grouped_sources)

input_ask = '''Suppose you are the chief editor at CNBC-TechCheck-Briefing. You need to select 5 most important news events to put into today's briefing(You might be able to see some hint by how many times a news event is reported, but also consider what your audience of CNBC-TechCheck-Briefing is interested in). Return the title of the event in order of importance for these unqiue events. Also, exclude these news events talked about yesterday:
microsoft wants to make windows an ai operating system, launches copilot+ pcs
scarlett johansson says openai ripped off her voice after she said the company can't use it
microsoft announces new pcs with ai chips from qualcomm
microsoft surface event: the 6 biggest announcements
in biometric 'breakthrough' year, you may soon start paying with your face
input_ask = '''Suppose you are the chief editor at CNBC-TechCheck-Briefing. You need to select 5 most important news events to put into today's briefing(You might be able to see some hint by how many times a news event is reported, but also consider what your audience of CNBC-TechCheck-Briefing is interested in). Return the title of the event in order of importance for these unqiue events.
Here are the news of today:\n''' + formatted_text
role = "Output the response as string titles in the seperated by newline. Each title should be exactly how it is in the news source."

Expand Down Expand Up @@ -240,7 +235,45 @@ def generate_podcast_title(self, transcript, language=None):
output_response_prompt = f"Output the Title in {language}."
input_ask = "Generate a title for this podcast. Must include three key topics (if there are many, choose the three most important ones). Incorporate emojis where appropriate. Pay attention to capitalization of titles. Follow the style of titles such as: Tesla Showcases FSD Demo 🚗, Adam Neuman's WeWork Bid 💰, CSV Conundrums 🖥️,Anthropic’s $4B Amazon Boost 💰, Brex's Valuation Leap to $12B 💳, Strategies for Success ✨,The OpenAI Voice Revolution 🗣️, AI Safety Measures 🦺, LLMs Go Mobile 📱. Here's the transcript excerpt: " + transcript + "\n" + output_response_prompt + "\nTitle:"
return self.ask_gpt(input_ask)


def generate_podcast_cover(self, titles, dir):
""" Genearted podcast cover art from the provided titles. """

gpt_prompt = f'''
{titles}
Use these news titles, generate a description of image formed if I'm using these three titles to create a cover image for a podcast episode. I don't want any text in the image.
Only output the description.
Example:
A cover image with three main sections. On the left, a sleek, futuristic PC with the Microsoft logo and holographic elements showcases AI integration. On the right, an abstract representation of Scarlett Johansson and the OpenAI logo, suggesting a confrontation or debate with stylized silhouettes or digital avatars. At the bottom, a hand holding a smartphone with a facial recognition interface and payment confirmation screen, surrounded by symbols of digital transactions like credit card icons or currency symbols. The background features digital circuits, holographic effects, and vibrant colors.
'''
image_prompt = self.ask_gpt(
input_ask=gpt_prompt,
)

response = self.openai_client.images.generate(
model="dall-e-3",
prompt=image_prompt,
size="1024x1024",
quality="standard",
n=1,
)

image_url = response.data[0].url
print(f"Generated image URL: {image_url}")

image_response = requests.get(image_url)

# Check if the request was successful
if image_response.status_code == 200:
with open(f'{dir}generated_image.jpeg', 'wb') as file:
file.write(image_response.content)
print("Image downloaded successfully!")
compress_image_to_target_size(f'{dir}generated_image.jpeg', 1)
else:
print("Failed to download the image")

return image_prompt,f'{dir}generated_image.jpeg'


def remove_leading_numbers(lst):
# This regular expression matches any leading numbers followed by a dot and any amount of whitespace
Expand Down Expand Up @@ -296,8 +329,9 @@ def remove_leading_numbers(lst):
podcast_description = orchestrator.generate_podcast_description(
polished_script)
podcast_title = episode_number + \
english_title_case(
orchestrator.generate_podcast_title(polished_script))
english_title_case(orchestrator.generate_podcast_title(polished_script))

image_prompt, podcast_cover = orchestrator.generate_podcast_cover(podcast_title, output_directory)
# Text to Speech for each language, including the original English
if PRODUCTION_MODE:
for language, cur_script in [('English', polished_script)]:
Expand All @@ -309,6 +343,7 @@ def remove_leading_numbers(lst):
f"Podcast in {language} completed successfully. Audio file at: {audio_file_path}")
else:
logging.error(f"Failed to generate {language} audio file.")


# Prepare the output text data
# output_data = f"Titles:\n{chr(10).join(titles)}\n\ntop_news_prompt: {top_news_prompt}\n\nTop News:\n{chr(10).join(top_news)}\n\nGenerate_scipt_prompt:\n{generate_script_prompt}\n\nScript:\n{script}\n\npolished_script:\n{polished_script}\n\nPodcast Title:\n{podcast_title}\n\npodcast_description:\n{podcast_description}\n"
Expand All @@ -321,7 +356,8 @@ def remove_leading_numbers(lst):
"Script": script,
"Polished Script": polished_script,
"Podcast Title": podcast_title,
"Podcast Description": podcast_description
"Podcast Description": podcast_description,
"Image Prompt": image_prompt,
}

# Define the output file path
Expand All @@ -336,8 +372,8 @@ def remove_leading_numbers(lst):

file_path = f"{output_directory}English_final_podcast.mp3"

print(publish_unix)
upload_podcast_episode(CLIENT_ID, CLIENT_SECRET, file_path, podcast_title, podcast_description, STATUS, TYPE, episode_prefix, publish_unix)
# print(publish_unix)
upload_podcast_episode(CLIENT_ID, CLIENT_SECRET, file_path, podcast_cover, podcast_title,podcast_description, STATUS, TYPE, episode_prefix, publish_unix)


else:
Expand Down
Binary file added output/2024-05-22/English_final_podcast.mp3
Binary file not shown.
Binary file added output/2024-05-22/generated_image.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
30 changes: 7 additions & 23 deletions test.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,11 @@
from newsScraper import scrape_cnbctech, is_today, scrape_verge, scrape_techcrunch
from datetime import datetime
import openai
import requests
from bs4 import BeautifulSoup
import re
import datetime
from utils.utils import get_day_of_week, get_next_weekday, get_upload_date, spanish_title_case, english_title_case
import base64

today = datetime.date.today()
from openai import OpenAI
from dotenv import load_dotenv
import os
from utils.utils import spanish_title_case, english_title_case


def test_scrape_cnbctech():
url = 'https://techcrunch.com/'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
data_links = soup.find_all('a', attrs={'data-destinationlink': True})

# Debugging: print the number of data_links found
print(f"Number of data links found: {len(data_links)}")

# Extract the href attributes and text of these links
articles = [[link.text.strip(), link['href']] for link in data_links if is_today(link['href'], current_dat) and len(link.text.strip()) > 0]
print(articles)
return articles


print(get_upload_date('2024-05-21'))
print(english_title_case("🔜 Nvidia’s new annual ai chip rollout 🧠, Amazon alexa’s ai overhaul with subscription 🎙️, microsoft ties executive pay to cybersecurity 💼"))
43 changes: 28 additions & 15 deletions utils/uploadPodbean.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
CONTENT = "Time you <b>enjoy</b> wasting, wdsfas not wasted." # description
STATUS = "draft"
TYPE = "public"
FILE_NAME = "output/2024-05-16/English_final_podcast.mp3"
PODCAST_FILE_NAME = "output/2024-05-16/English_final_podcast.mp3"
PODCAST_COVER_ART = "assets/sample.jpg"

def upload_podcast_episode(client_id, client_secret, file_name, title, content, status, type_, episode_number=None, publish_timestamp=None):

def upload_podcast_episode(client_id, client_secret, podcast_file_name, podcast_cover_art, title, content, status, type_, episode_number=None, publish_timestamp=None):
# Step 1: Obtain OAuth token
def get_oauth_token(client_id, client_secret):
url = 'https://api.podbean.com/v1/oauth/token'
Expand All @@ -34,7 +36,7 @@ def get_file_size(filename):
return os.path.getsize(filename)

# Step 3: Get upload authorization and presigned URL
def get_upload_authorization(access_token, filename):
def get_upload_authorization(access_token, filename, content_type='audio/mpeg'):
url = 'https://api.podbean.com/v1/files/uploadAuthorize'

absolute_path = os.path.abspath(filename)
Expand All @@ -49,7 +51,7 @@ def get_upload_authorization(access_token, filename):
'access_token': access_token,
'filename': os.path.basename(absolute_path),
'filesize': filesize,
'content_type': 'audio/mpeg'
'content_type': content_type
}
headers = {'User-Agent': USER_AGENT}

Expand All @@ -64,19 +66,22 @@ def get_upload_authorization(access_token, filename):
return None, None

# Step 4: Upload file using presigned URL
def upload_file_to_presigned_url(presigned_url, filename):
headers = {'Content-Type': 'audio/mpeg'}
def upload_file_to_presigned_url(presigned_url, filename, content_type='audio/mpeg'):
if presigned_url is None:
print("Presigned URL is missing.")
return False
headers = {'Content-Type': content_type}
with open(filename, 'rb') as f:
response = requests.put(presigned_url, data=f, headers=headers)
if response.status_code == 200:
print("File successfully uploaded.")
print(f"{content_type}: File successfully uploaded.")
return True
else:
print("Failed to upload file:", response.status_code, response.text)
print(f"{content_type}: Failed to upload file:", response.status_code, response.text)
return False

# Step 5: Publish episode
def publish_episode(access_token, title, content, status, type_, media_key, episode_number=None, publish_timestamp=None):
def publish_episode(access_token, title, content, status, type_, media_key, logo_key, episode_number=None, publish_timestamp=None):
url = 'https://api.podbean.com/v1/episodes'
headers = {
'User-Agent': USER_AGENT,
Expand All @@ -88,6 +93,7 @@ def publish_episode(access_token, title, content, status, type_, media_key, epis
'status': status,
'type': type_,
'media_key': media_key,
'logo_key': logo_key,
'episode_number': episode_number,
'publish_timestamp': publish_timestamp
}
Expand All @@ -108,13 +114,20 @@ def publish_episode(access_token, title, content, status, type_, media_key, epis

access_token = get_oauth_token(client_id, client_secret)
if access_token:
presigned_url, media_key = get_upload_authorization(access_token, file_name)
if presigned_url and media_key:
if upload_file_to_presigned_url(presigned_url, file_name):
publish_episode(access_token, title, content, status, type_, media_key, episode_number, publish_timestamp)

presigned_url, media_key = get_upload_authorization(access_token, podcast_file_name, content_type='audio/mpeg')
presigned_url2, logo_key = get_upload_authorization(access_token, podcast_cover_art, content_type='image/jpg')

if presigned_url and media_key and presigned_url2 and logo_key:
if upload_file_to_presigned_url(presigned_url, podcast_file_name, content_type='audio/mpeg') and upload_file_to_presigned_url(presigned_url2, podcast_cover_art, content_type='image/jpg'):
publish_episode(access_token, title, content, status, type_, media_key,logo_key, episode_number, publish_timestamp)
else:
print("Failed to upload file or get presigned URL.")
else:
print("Failed to upload file or get presigned URL.")


if __name__ == '__main__':
CLIENT_ID = os.getenv("PODBEAN_CLIENT_ID")
CLIENT_SECRET = os.getenv("PODBEAN_CLIENT_SECRET")

upload_podcast_episode(CLIENT_ID, CLIENT_SECRET, FILE_NAME, TITLE, CONTENT, STATUS, TYPE, 24, 1670000000)
upload_podcast_episode(CLIENT_ID, CLIENT_SECRET, PODCAST_FILE_NAME,PODCAST_COVER_ART, TITLE, CONTENT, STATUS, TYPE, 24)
30 changes: 29 additions & 1 deletion utils/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from datetime import datetime
from datetime import datetime, timedelta
import pytz
from PIL import Image
import os


def get_day_of_week(date):
date_obj = datetime.strptime(date, '%Y-%m-%d')
Expand Down Expand Up @@ -67,4 +70,29 @@ def english_title_case(text):
new_title.append(word.lower())
else:
new_title.append(word.capitalize())
return ' '.join(new_title)
return ' '.join(new_title)


def compress_image_to_target_size(input_path, target_size_mb, initial_quality=85, step=5):
"""
Compresses an image to ensure its size is below a target size in MB, overwriting the original image.
:param input_path: Path to the input image.
:param target_size_mb: Target size in MB.
:param initial_quality: Initial quality for compression.
:param step: Step to reduce quality in each iteration.
"""
target_size_bytes = target_size_mb * 1024 * 1024
quality = initial_quality

with Image.open(input_path) as img:
while True:
img.save(input_path, 'JPEG', quality=quality)
output_size = os.path.getsize(input_path)

if output_size <= target_size_bytes or quality <= step:
break

quality -= step
if quality <= 0:
raise ValueError("Cannot compress image to the desired size.")

0 comments on commit 21d6f64

Please sign in to comment.