Skip to content

Commit

Permalink
Merge pull request #144 from bors-ltd/video_date
Browse files Browse the repository at this point in the history
also store and export video (upload) date
  • Loading branch information
NotJoeMartinez authored Jun 25, 2024
2 parents 6861c0c + 32c6572 commit 50a958a
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 19 deletions.
20 changes: 15 additions & 5 deletions yt_fts/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from rich.console import Console
from rich.table import Table

from .utils import show_message
from .utils import show_message, get_date
from .config import get_db_path


Expand All @@ -25,7 +25,8 @@ def make_db(db_path):
"video_id": str,
"video_title": str,
"video_url": str,
"channel_id": str
"channel_id": str,
"video_date": str,
},
pk="video_id",
not_null={"video_title", "video_url"},
Expand Down Expand Up @@ -78,16 +79,16 @@ def add_channel_info(channel_id, channel_name, channel_url):
})


def add_video(channel_id, video_id, video_title, video_url):
def add_video(channel_id, video_id, video_title, video_url, video_date):

conn = sqlite3.connect(get_db_path())
cur = conn.cursor()
existing_video = cur.execute("SELECT * FROM Videos WHERE video_id = ?",
(video_id,)).fetchone()

if existing_video is None:
cur.execute("INSERT INTO Videos (video_id, video_title, video_url, channel_id) VALUES (?, ?, ?, ?)",
(video_id, video_title, video_url, channel_id))
cur.execute("INSERT INTO Videos (video_id, video_title, video_url, video_date, channel_id) VALUES (?, ?, ?, ?, ?)",
(video_id, video_title, video_url, video_date, channel_id))
conn.commit()

else:
Expand Down Expand Up @@ -144,6 +145,15 @@ def get_title_from_db(video_id):
return db.execute(f"SELECT video_title FROM Videos WHERE video_id = ?", [video_id]).fetchone()[0]


def get_metadata_from_db(video_id):

db = Database(get_db_path())

metadata = db.execute_returning_dicts(f"SELECT * FROM Videos WHERE video_id = ?", [video_id])[0]
metadata["video_date"] = get_date(metadata["video_date"])
return metadata


def get_channel_name_from_id(channel_id):

db = Database(get_db_path())
Expand Down
6 changes: 3 additions & 3 deletions yt_fts/download.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

import tempfile
import subprocess, re, os, sqlite3, json

Expand All @@ -8,7 +7,7 @@

from .config import get_db_path
from .db_utils import add_video
from .utils import parse_vtt
from .utils import parse_vtt, get_date
from urllib.parse import urlparse

from rich.progress import track
Expand Down Expand Up @@ -215,9 +214,10 @@ def vtt_to_db(dir_path):
vid_json = json.load(f)

vid_title = vid_json['title']
vid_date = get_date(vid_json['upload_date'])
channel_id = vid_json['channel_id']

add_video(channel_id, vid_id, vid_title, vid_url)
add_video(channel_id, vid_id, vid_title, vid_url, vid_date)

vtt_json = parse_vtt(vtt)

Expand Down
15 changes: 11 additions & 4 deletions yt_fts/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from .db_utils import (
search_channel, search_video, search_all,
get_channel_name_from_video_id, get_title_from_db
get_channel_name_from_video_id, get_metadata_from_db,
)

from .utils import time_to_secs, show_message
Expand Down Expand Up @@ -35,17 +35,24 @@ def export_fts(text, scope, channel_id=None, video_id=None):

with open(file_name, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['Channel Name','Video Title', 'Quote', 'Time Stamp', 'Link'])
writer.writerow(['Channel Name','Video Title', 'Date', 'Quote', 'Time Stamp', 'Link'])

for quote in res:
video_id = quote["video_id"]
channel_name = get_channel_name_from_video_id(video_id)
video_title = get_title_from_db(video_id)
metadata = get_metadata_from_db(video_id)
time_stamp = quote["start_time"]
subs = quote["text"]
time = time_to_secs(time_stamp)

writer.writerow([channel_name,video_title, subs.strip(), time_stamp, f"https://youtu.be/{video_id}?t={time}"])
writer.writerow([
channel_name,
metadata['video_title'],
metadata['video_date'],
subs.strip(),
time_stamp,
f"https://youtu.be/{video_id}?t={time}"
])

console = Console()

Expand Down
16 changes: 9 additions & 7 deletions yt_fts/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def print_fts_res(res, query):
quote_match["channel_name"] = get_channel_name_from_video_id(video_id)
channel_names.append(quote_match["channel_name"])

quote_match["video_title"] = get_title_from_db(video_id)
quote_match["metadata"] = get_metadata_from_db(video_id)
quote_match["subs"] = bold_query_matches(quote["text"].strip(), query)
quote_match["time_stamp"] = time_stamp
quote_match["video_id"] = video_id
Expand Down Expand Up @@ -94,17 +94,19 @@ def print_fts_res(res, query):
fts_dict = {}
for quote in fts_res:
channel_name = quote["channel_name"]
video_name = quote["video_title"]
metadata = quote["metadata"]
video_name = metadata["video_title"]
video_date = metadata["video_date"]
quote_data = {
"quote": quote["subs"],
"time_stamp": quote["time_stamp"],
"link": quote["link"]
}
if channel_name not in fts_dict:
fts_dict[channel_name] = {}
if video_name not in fts_dict[channel_name]:
fts_dict[channel_name][video_name] = []
fts_dict[channel_name][video_name].append(quote_data)
if (video_name, video_date) not in fts_dict[channel_name]:
fts_dict[channel_name][(video_name, video_date)] = []
fts_dict[channel_name][(video_name, video_date)].append(quote_data)


# Sort the list by the total number of quotes in each channel
Expand All @@ -119,8 +121,8 @@ def print_fts_res(res, query):
video_list = list(videos.items())
video_list.sort(key=lambda x: len(x[1]))

for video_name, quotes in video_list:
console.print(f" [bold][blue]{video_name}[/blue][/bold]")
for (video_name, video_date), quotes in video_list:
console.print(f" [bold][blue]{video_name}[/blue][/bold] ({video_date})")
console.print("")

# Sort the quotes by timestamp
Expand Down
8 changes: 8 additions & 0 deletions yt_fts/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
This is where I'm putting all the functions that don't belong anywhere else
"""
import datetime
import re
import sqlite3

Expand Down Expand Up @@ -91,6 +92,13 @@ def get_time_delta(timestamp1, timestamp2):
return diff


def get_date(date_string):
# Python 3.11 would support datimetime.date.fromisoformat('YYYYMMDD') directly
if '-' in date_string:
return datetime.date.fromisoformat(date_string)
return datetime.datetime.strptime(date_string, '%Y%m%d').date()


# check if semantic search has been enabled for channel
def check_ss_enabled(channel_id=None):

Expand Down

0 comments on commit 50a958a

Please sign in to comment.