Skip to content

Commit

Permalink
validate video urls
Browse files Browse the repository at this point in the history
  • Loading branch information
NotJoeMartinez committed Sep 6, 2024
1 parent 0b715f8 commit f8dfc30
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 4 deletions.
71 changes: 71 additions & 0 deletions yt_fts/summarize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import sys
from urllib.parse import urlparse, parse_qs

from rich.console import Console

# determine if input_video is url or video id
# if it's a url get the video id
# check if the video id is in database
# if video id is in database get the full transcript
# if the video id is not in the database download the transcript
# feed the transcript to an llm and print the summary


class SummarizeHandler:
def __init__(self, openai_client, input_video):

self.console = Console()
self.openai_client = openai_client
self.input_video = input_video

if "https" in input_video:
self.video_id = self.get_video_id_from_url(input_video)
else:
self.video_id = input_video



def summarize_video(self):
console = self.console
input_video = self.input_video



def get_video_id_from_url(self, video_url):
# https://www.youtube.com/watch?v=Xjk6d5fPs_k
# https://youtu.be/Xjk6d5fPs_k?si=BBb2URutUT2gG4th
# https://youtu.be/Xjk6d5fPs_k
# https://www.youtube.com/watch?v=Xjk6d5fPs_k&si=BBb2URutUT2gG4th

console = self.console
video_url = video_url.strip('/')
parsed = urlparse(video_url)
domain = parsed.netloc
path = parsed.path.split('/')
query = parse_qs(parsed.query)

valid_domains = ["youtube.com", "youtu.be", "www.youtube.com"]

if domain not in valid_domains:
console.print("[red]Error:[/red] "
f"Invalid URL, domain \"{domain}\" not supported.")
sys.exit(1)


if domain in ["youtube.com", "www.youtube.com"] and "watch" in path:
video_id = query.get('v', [None])[0]
elif domain == "youtu.be":
video_id = path[-1]
else:
console.print("[red]Error:[/red] "
"Invalid URL, please provide a valid YouTube video URL.")
sys.exit(1)

if video_id:
return video_id

console.print("[red]Error:[/red] "
"Invalid URL, please provide a valid YouTube video URL.")
sys.exit(1)


35 changes: 31 additions & 4 deletions yt_fts/yt_fts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@
import sys
import click

from openai import OpenAI
from rich.console import Console

from .download import DownloadHandler
from .export import ExportHandler
from .search import SearchHandler
from .summarize import SummarizeHandler

from .list import list_channels
from .utils import show_message
from .config import (
Expand Down Expand Up @@ -164,7 +169,6 @@ def delete(channel):
@click.option("-f", "--format", default="txt",
help="The format to export transcripts to. Supported formats: txt, vtt")
def export(channel, format):
from .export import ExportHandler

export_handler = ExportHandler(
scope = "channel",
Expand All @@ -188,7 +192,6 @@ def export(channel, format):
@click.option("-l", "--limit", default=10, type=int, help="Number of results to return")
@click.option("-e", "--export", is_flag=True, help="Export search results to a CSV file.")
def search(text, channel, video_id, export, limit):
from yt_fts.search import SearchHandler

if len(text) > 40:
show_message("search_too_long")
Expand Down Expand Up @@ -229,8 +232,6 @@ def search(text, channel, video_id, export, limit):
help="OpenAI API key. If not provided, the script will attempt to read it from the OPENAI_API_KEY "
"environment variable.")
def vsearch(text, channel, video_id, limit, export, openai_api_key):
from openai import OpenAI
from .search import SearchHandler

if openai_api_key is None:
openai_api_key = os.environ.get("OPENAI_API_KEY")
Expand Down Expand Up @@ -343,6 +344,32 @@ def llm(prompt, channel, openai_api_key=None):
sys.exit(0)


@cli.command(
name="summarize",
help="summarize a youtube video"
)
@click.argument("video", required=True)
@click.option("--model", "-m", default="gpt-4o",
help="Model to use in summary")
@click.option("--openai-api-key", default=None,
help="OpenAI API key. If not provided, the script will attempt to read it from"
" the OPENAI_API_KEY environment variable.")
def summarize(video, model, openai_api_key):
if openai_api_key is None:
openai_api_key = os.environ.get("OPENAI_API_KEY")

if openai_api_key is None:
console.print("[red]Error:[/red] OPENAI_API_KEY environment variable not set\n"
"To set the key run: export \"OPENAI_API_KEY=<your_key>\" or pass "
"one in with --openai-api-key")
sys.exit(1)

openai_client = OpenAI(api_key=openai_api_key)

summarize_handler = SummarizeHandler(openai_client, video)
summarize_handler.summarize_video()


@cli.command(
help="""
Show config settings
Expand Down

0 comments on commit f8dfc30

Please sign in to comment.