Skip to content

Commit

Permalink
Short reviews, link reviews, improved parse logic
Browse files Browse the repository at this point in the history
  • Loading branch information
Potajito committed May 12, 2024
1 parent 2b88d46 commit 1e4cc73
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 24 deletions.
57 changes: 44 additions & 13 deletions bookwyrm.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from configuration import LOGLEVEL, BOOKWYRM_SERVICE
from configuration import TIME_ZONE, DATE_FORMAT_INPUT, DATE_FORMAT_OUTPUT
from classes import Review, BookUser
from classes import is_old_review

if logging.root.level == logging.DEBUG:
install(show_locals=True)
Expand Down Expand Up @@ -103,6 +104,19 @@ def find_book_author(entry: NavigableString) -> str:
except Exception:
return 'Unknown author'

def find_review_url(entry: NavigableString, profile_url: str) -> str:
try:
href_pattern = re.compile(r'https://bookwyrm\.social/user/.+')
tag: NavigableString = entry.find('a', href=href_pattern)
if tag:
review_url = tag['href']
if review_url:
return review_url
else:
return profile_url
except Exception:
return profile_url

def find_time_elapsed(entry: NavigableString) -> str:
try:
href_pattern = re.compile(r'https://bookwyrm\.social/user/.+')
Expand All @@ -119,7 +133,8 @@ def find_time_elapsed(entry: NavigableString) -> str:

def fill_review (title: str, score: int, author: str,
url: str, image_url: str, user_url: str,
username: str, user_image_url: str, review_time_stamp: str, review_text: str) -> Review:
username: str, user_image_url: str, review_time_stamp: str,
review_text: str, review_url: str) -> Review:
"""Adds fields to Review class
Args:
Expand All @@ -138,12 +153,14 @@ def fill_review (title: str, score: int, author: str,
"username": username,
"user_image_url": user_image_url,
"review_time_stamp": review_time_stamp,
"review_text": review_text
"review_text": review_text,
"review_url": review_url
}
# log.debug(f"Added review: {current_review}")
return current_review

def parse_user_profile (profile_url: str) -> List[Review]:
def parse_user_profile (user: BookUser) -> List[Review]:
profile_url = user['user_url']
reviews: List[Review] = []
try:
profile_url_domain = urlparse(profile_url).hostname
Expand All @@ -162,6 +179,7 @@ def parse_user_profile (profile_url: str) -> List[Review]:
username = entry.find('span', itemprop='name').text.strip()
book_name = find_book_title(entry)
time_elapsed_str = find_time_elapsed(entry)
review_url = find_review_url(entry, profile_url)
review_time_stamp = convert_elapsed_to_timestamp(time_elapsed_str)
score_in_stars = entry.select_one('.stars .is-sr-only').text.strip()
score = int(re.findall(r'\d+', score_in_stars)[0])
Expand All @@ -181,15 +199,21 @@ def parse_user_profile (profile_url: str) -> List[Review]:

# log.debug(book_url)
break
reviews.append(fill_review(book_name, score, author,
book_url, image_url, profile_url,
username, user_image_url, review_time_stamp, review_text))
review = fill_review(book_name, score, author,
book_url, image_url, profile_url,
username, user_image_url, review_time_stamp,
review_text, review_url)
reviews.append(review)
clean_string = f"{username} rated {book_name} by {author}: {score}"
log.info(clean_string)
if is_old_review(user, review):
log.info(f"Finished checking reviews, found old review")
break
if ' reviewed ' in entry.text:
username = entry.find('span', itemprop='name').text.strip()
book_name = find_book_title(entry)
time_elapsed_str = find_time_elapsed(entry)
review_url = find_review_url(entry, profile_url)
review_time_stamp = convert_elapsed_to_timestamp(time_elapsed_str)
author = find_book_author(entry)

Expand Down Expand Up @@ -223,11 +247,18 @@ def parse_user_profile (profile_url: str) -> List[Review]:

# log.debug(book_url)
break
reviews.append(fill_review(book_name, score, author,
review = fill_review(book_name, score, author,
book_url, image_url, profile_url,
username, user_image_url, review_time_stamp, review_text))
username, user_image_url, review_time_stamp,
review_text, review_url)
reviews.append(review)

clean_string = f"{username} reviewed {book_name} by {author}: {score}\n Review: {review_text}"
log.info(clean_string)
if is_old_review(user, review):
log.info(f"Finished checking reviews, found old review")
break

log.info(f"Found {len(reviews)} reviews")
#log.debug(pprint(reviews))
return reviews
Expand All @@ -242,9 +273,9 @@ def get_users_reviews (users: List[BookUser]) -> List[Review]:
reviews: List[Review] = []
for user in users:
if user['service'] == BOOKWYRM_SERVICE:
user_reviews = parse_user_profile(user['user_url'])
user_reviews = parse_user_profile(user)
reviews = reviews + user_reviews
log.debug(pprint(reviews))
#log.debug(pprint(reviews))
return reviews

def convert_elapsed_to_timestamp(elapsed_time: str) -> str:
Expand Down Expand Up @@ -326,9 +357,9 @@ def convert_elapsed_to_timestamp(elapsed_time: str) -> str:
formatted_timestamp = target_time.strftime(timestamp_format)
return formatted_timestamp

def test_this ():
""" def test_this ():
profile_url = 'https://bookwyrm.social/user/potajito'
log.debug(f' Trying {profile_url}')
log.debug(f' Trying {profile_url}')
parse_user_profile(profile_url)
test_this()
test_this() """
24 changes: 24 additions & 0 deletions classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class Review(TypedDict):
username: str
user_image_url: str
review_text: str
review_url: str

class BookUser(TypedDict):
service: int
Expand Down Expand Up @@ -88,6 +89,20 @@ def get_stars (score: int) -> str:
score_star += '★'
return score_star

def is_old_review (user: BookUser, review: Review):
last_review_ts = datetime.datetime.strptime(user["last_review_ts"], DATE_FORMAT_OUTPUT)
if user["user_url"] == review["user_url"]:
if (last_review_ts.timestamp() < datetime.datetime.strptime(review["review_time_stamp"],DATE_FORMAT_OUTPUT).timestamp()):
#new_reviews.append(review)
log.debug(f'User Review Datetime: {user["last_review_ts"]}')
log.info(f"New review for {review['title']} by {user['user_url']} on {review['review_time_stamp']}")
if user["last_review_ts"] < review["review_time_stamp"]:
return False
else:
log.debug(f"Old review: {review['title']}. Stopping loop.")
return True


def check_new_reviews (reviews: list[Review], data: dict) -> list[Review]:
log.info("Checking for new reviews")
new_reviews = []
Expand All @@ -110,6 +125,15 @@ def check_new_reviews (reviews: list[Review], data: dict) -> list[Review]:
write_to_users_json(data)
return new_reviews

def format_review_text (review: Review) -> str:
max_review_lenght = 350
if len(review["review_text"]) > max_review_lenght:
review["review_text"] = review["review_text"][:max_review_lenght] + "..."
review["review_text"] = (f"{review['author']}\n\n"
f">>> {review['review_text']}\n"
f"[Ver reseña completa]({review['review_url']})")
return review["review_text"]

def get_data_id_from_user_url(data: dict, user_url: str) -> int:
for i, user in enumerate(data["users"]):
if user["user_url"] == user_url:
Expand Down
25 changes: 22 additions & 3 deletions configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,29 @@

load_dotenv()

LOGLEVEL = logging.DEBUG
#.ENVS

if os.getenv("LOGLEVEL") is None:
LOGLEVEL=logging.INFO
else:
LOGLEVEL = int(os.getenv("LOGLEVEL"))
### LOGLEVELS

#CRITICAL = 50
#FATAL = CRITICAL
#ERROR = 40
#WARNING = 30
#WARN = WARNING
#INFO = 20
#DEBUG = 10
#NOTSET = 0

DISCORD_TOKEN = os.getenv("DISCORD_TOKEN_ENV")
GUILD_ID = "757271564227182602" # string
CHANNEL_ID = 815716163102179350 # int
GUILD_ID = os.getenv("GUILD_ID") # string
CHANNEL_ID = os.getenv("CHANNEL_ID") # int

#.ENVS

DATA_FOLDER = "data"
USERS_JSON_FILE_NAME = "users.json"
USERS_JSON_FILE_PATH = os.path.join(DATA_FOLDER,USERS_JSON_FILE_NAME)
Expand Down
6 changes: 3 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import configuration
from configuration import LOGLEVEL, DATA_FOLDER, USERS_JSON_FILE_PATH, GOODREADS_SERVICE, BOOKWYRM_SERVICE
from classes import Review, BookUser, check_new_reviews, get_stars, read_json_data
from classes import extract_user_from_url, read_json_data, write_to_users_json
from classes import extract_user_from_url, read_json_data, write_to_users_json, format_review_text
from exceptions import UrlNotValid
from rss_helper import RSSHelper
from rss_helper import DATE_FORMAT_INPUT, DATE_FORMAT_OUTPUT
Expand Down Expand Up @@ -122,8 +122,8 @@ async def timer(self, channel, force_check=False):
if review['review_text'] == "":
embed.description = f"{review['author']}"
else:
embed.description = f"{review['author']}\n\n>>> {review['review_text']}"
log.debug(f"Review sent for user: {review['username']}")
embed.description = format_review_text(review)
log.info(f"Review sent for user: {review['username']}")
await channel.send(embed=embed, mention_author=True)
self.msg_sent = True
reviews = []
Expand Down
16 changes: 12 additions & 4 deletions rss_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from configuration import LOGLEVEL, DATA_FOLDER, USERS_JSON_FILE_PATH, GOODREADS_SERVICE
from configuration import TIME_ZONE, DATE_FORMAT_INPUT, DATE_FORMAT_OUTPUT
import pytz
from classes import Review, BookUser, read_json_data, write_to_users_json
from classes import Review, BookUser, read_json_data, write_to_users_json, is_old_review
import bookwyrm

USERS_JSON_FILE_PATH = "data/users.json"
Expand Down Expand Up @@ -100,6 +100,10 @@ def get_rss_data_goodreads(self, users: List[BookUser]) -> List[Review]:
author_extract = second_href[second_href.find('<a class="authorName"'):]
author = author_extract[author_extract.find(">") + 1: author_extract.find("</a>")]

# Extract Review URL
review_url = entry.link
log.debug(f"Review URL found: {review_url}")

# Extract Score
if star_position != -1:
score = info[star_position - 2: star_position].strip()
Expand Down Expand Up @@ -130,8 +134,7 @@ def get_rss_data_goodreads(self, users: List[BookUser]) -> List[Review]:
user_image_url = get_user_image(user["id"])
except:
user_image_url = "https://i.imgur.com/9pNffkj.png"

reviews.append( {
review = {
"title": title,
"score": int(score),
"author": author,
Expand All @@ -141,9 +144,14 @@ def get_rss_data_goodreads(self, users: List[BookUser]) -> List[Review]:
"username": username,
"user_image_url": user_image_url,
"review_text": review_text,
"review_url": review_url,
"review_time_stamp": review_date_timezoned.strftime(DATE_FORMAT_OUTPUT),
})
}
reviews.append(review)
log.debug(f"Review found from: {username} for: {title}")
if is_old_review(user, review):
log.info(f"Finished checking reviews for user {username}, found old review")
break
except Exception as error:
console.print_exception()
# log.debug(f"Bad entry: {entry}")
Expand Down
1 change: 1 addition & 0 deletions tests_bookwyrm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from rich.logging import RichHandler
from urllib.parse import urlparse, urljoin
import re
from bookwyrm import *
from classes import Review, BookUser

FORMAT = "%(message)s"
Expand Down
4 changes: 3 additions & 1 deletion tests_goodreads.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,6 @@
log.debug("No review text found.")
except Exception as error:
logging.error(traceback.format_exc())
#return []
#return []

print(len("aaa"))

0 comments on commit 1e4cc73

Please sign in to comment.