Skip to content

Commit

Permalink
letterboxd: include reviews
Browse files Browse the repository at this point in the history
  • Loading branch information
ngalaiko committed Dec 14, 2024
1 parent 7daa9b0 commit 166b3d7
Showing 1 changed file with 7 additions and 13 deletions.
20 changes: 7 additions & 13 deletions scripts/update/letterboxd.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ def parse_watch_number(href):
watch_number = (
watch_number if watch_number < 10 else 0
) # because movie 1917 exists
data_output = os.path.join(
output, entry["title_slug"], f"{watch_number}.json")
data_output = os.path.join(output, entry["title_slug"], f"{watch_number}.json")

os.makedirs(os.path.dirname(data_output), exist_ok=True)

Expand Down Expand Up @@ -71,7 +70,7 @@ def parse_page(body):
soup = BeautifulSoup(body, "html.parser")
entries = [
parse_entry(entry)
for entry in soup.find_all(attrs={"data-object-name": "entry"})
for entry in soup.find_all(attrs={"data-object-name": ["entry", "review"]})
]
has_next = bool(soup.find("a", class_="next"))
return entries, has_next
Expand All @@ -82,21 +81,17 @@ def parse_entry(entry):
date_str = date_node["href"].split("/")[-4:-1]
date = datetime.strptime("-".join(date_str), "%Y-%m-%d").date()

title = entry.find(
"td", class_="td-film-details").find("h3").find("a").text
title = entry.find("td", class_="td-film-details").find("h3").find("a").text

is_liked = bool(entry.find(
"td", class_="td-like").find(class_="icon-liked"))
is_liked = bool(entry.find("td", class_="td-like").find(class_="icon-liked"))
is_rewatch = bool(
entry.find(
"td", class_="td-rewatch").find(class_="icon-status-off") is None
entry.find("td", class_="td-rewatch").find(class_="icon-status-off") is None
)

details = entry.find("td", class_="td-actions")
href = (
"https://letterboxd.com"
+ entry.find("td",
class_="td-film-details").find("h3").find("a")["href"]
+ entry.find("td", class_="td-film-details").find("h3").find("a")["href"]
)
title_slug = details["data-film-slug"]

Expand Down Expand Up @@ -131,8 +126,7 @@ def get_poster(entry):


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Scrape Letterboxd diary entries.")
parser = argparse.ArgumentParser(description="Scrape Letterboxd diary entries.")
parser.add_argument(
"-o", "--output", help="Output directory", default="./assets/movies"
)
Expand Down

0 comments on commit 166b3d7

Please sign in to comment.