diff --git a/bookwyrm.py b/bookwyrm.py index 5b8fcfc..4c0c375 100644 --- a/bookwyrm.py +++ b/bookwyrm.py @@ -17,6 +17,7 @@ from configuration import LOGLEVEL, BOOKWYRM_SERVICE from configuration import TIME_ZONE, DATE_FORMAT_INPUT, DATE_FORMAT_OUTPUT from classes import Review, BookUser +from classes import is_old_review if logging.root.level == logging.DEBUG: install(show_locals=True) @@ -103,6 +104,19 @@ def find_book_author(entry: NavigableString) -> str: except Exception: return 'Unknown author' +def find_review_url(entry: NavigableString, profile_url: str) -> str: + try: + href_pattern = re.compile(r'https://bookwyrm\.social/user/.+') + tag: NavigableString = entry.find('a', href=href_pattern) + if tag: + review_url = tag['href'] + if review_url: + return review_url + else: + return profile_url + except Exception: + return profile_url + def find_time_elapsed(entry: NavigableString) -> str: try: href_pattern = re.compile(r'https://bookwyrm\.social/user/.+') @@ -119,7 +133,8 @@ def find_time_elapsed(entry: NavigableString) -> str: def fill_review (title: str, score: int, author: str, url: str, image_url: str, user_url: str, - username: str, user_image_url: str, review_time_stamp: str, review_text: str) -> Review: + username: str, user_image_url: str, review_time_stamp: str, + review_text: str, review_url: str) -> Review: """Adds fields to Review class Args: @@ -138,12 +153,14 @@ def fill_review (title: str, score: int, author: str, "username": username, "user_image_url": user_image_url, "review_time_stamp": review_time_stamp, - "review_text": review_text + "review_text": review_text, + "review_url": review_url } # log.debug(f"Added review: {current_review}") return current_review -def parse_user_profile (profile_url: str) -> List[Review]: +def parse_user_profile (user: BookUser) -> List[Review]: + profile_url = user['user_url'] reviews: List[Review] = [] try: profile_url_domain = urlparse(profile_url).hostname @@ -162,6 +179,7 @@ def parse_user_profile (profile_url: str) -> List[Review]: username = entry.find('span', itemprop='name').text.strip() book_name = find_book_title(entry) time_elapsed_str = find_time_elapsed(entry) + review_url = find_review_url(entry, profile_url) review_time_stamp = convert_elapsed_to_timestamp(time_elapsed_str) score_in_stars = entry.select_one('.stars .is-sr-only').text.strip() score = int(re.findall(r'\d+', score_in_stars)[0]) @@ -181,15 +199,21 @@ def parse_user_profile (profile_url: str) -> List[Review]: # log.debug(book_url) break - reviews.append(fill_review(book_name, score, author, - book_url, image_url, profile_url, - username, user_image_url, review_time_stamp, review_text)) + review = fill_review(book_name, score, author, + book_url, image_url, profile_url, + username, user_image_url, review_time_stamp, + review_text, review_url) + reviews.append(review) clean_string = f"{username} rated {book_name} by {author}: {score}" log.info(clean_string) + if is_old_review(user, review): + log.info(f"Finished checking reviews, found old review") + break if ' reviewed ' in entry.text: username = entry.find('span', itemprop='name').text.strip() book_name = find_book_title(entry) time_elapsed_str = find_time_elapsed(entry) + review_url = find_review_url(entry, profile_url) review_time_stamp = convert_elapsed_to_timestamp(time_elapsed_str) author = find_book_author(entry) @@ -223,11 +247,18 @@ def parse_user_profile (profile_url: str) -> List[Review]: # log.debug(book_url) break - reviews.append(fill_review(book_name, score, author, + review = fill_review(book_name, score, author, book_url, image_url, profile_url, - username, user_image_url, review_time_stamp, review_text)) + username, user_image_url, review_time_stamp, + review_text, review_url) + reviews.append(review) + clean_string = f"{username} reviewed {book_name} by {author}: {score}\n Review: {review_text}" log.info(clean_string) + if is_old_review(user, review): + log.info(f"Finished checking reviews, found old review") + break + log.info(f"Found {len(reviews)} reviews") #log.debug(pprint(reviews)) return reviews @@ -242,9 +273,9 @@ def get_users_reviews (users: List[BookUser]) -> List[Review]: reviews: List[Review] = [] for user in users: if user['service'] == BOOKWYRM_SERVICE: - user_reviews = parse_user_profile(user['user_url']) + user_reviews = parse_user_profile(user) reviews = reviews + user_reviews - log.debug(pprint(reviews)) + #log.debug(pprint(reviews)) return reviews def convert_elapsed_to_timestamp(elapsed_time: str) -> str: @@ -326,9 +357,9 @@ def convert_elapsed_to_timestamp(elapsed_time: str) -> str: formatted_timestamp = target_time.strftime(timestamp_format) return formatted_timestamp -def test_this (): +""" def test_this (): profile_url = 'https://bookwyrm.social/user/potajito' - log.debug(f' Trying {profile_url}') + log.debug(f' Trying {profile_url}') parse_user_profile(profile_url) -test_this() \ No newline at end of file +test_this() """ \ No newline at end of file diff --git a/classes.py b/classes.py index 2b30136..9d4b76e 100644 --- a/classes.py +++ b/classes.py @@ -35,6 +35,7 @@ class Review(TypedDict): username: str user_image_url: str review_text: str + review_url: str class BookUser(TypedDict): service: int @@ -88,6 +89,20 @@ def get_stars (score: int) -> str: score_star += '★' return score_star +def is_old_review (user: BookUser, review: Review): + last_review_ts = datetime.datetime.strptime(user["last_review_ts"], DATE_FORMAT_OUTPUT) + if user["user_url"] == review["user_url"]: + if (last_review_ts.timestamp() < datetime.datetime.strptime(review["review_time_stamp"],DATE_FORMAT_OUTPUT).timestamp()): + #new_reviews.append(review) + log.debug(f'User Review Datetime: {user["last_review_ts"]}') + log.info(f"New review for {review['title']} by {user['user_url']} on {review['review_time_stamp']}") + if user["last_review_ts"] < review["review_time_stamp"]: + return False + else: + log.debug(f"Old review: {review['title']}. Stopping loop.") + return True + + def check_new_reviews (reviews: list[Review], data: dict) -> list[Review]: log.info("Checking for new reviews") new_reviews = [] @@ -110,6 +125,15 @@ def check_new_reviews (reviews: list[Review], data: dict) -> list[Review]: write_to_users_json(data) return new_reviews +def format_review_text (review: Review) -> str: + max_review_lenght = 350 + if len(review["review_text"]) > max_review_lenght: + review["review_text"] = review["review_text"][:max_review_lenght] + "..." + review["review_text"] = (f"{review['author']}\n\n" + f">>> {review['review_text']}\n" + f"[Ver reseña completa]({review['review_url']})") + return review["review_text"] + def get_data_id_from_user_url(data: dict, user_url: str) -> int: for i, user in enumerate(data["users"]): if user["user_url"] == user_url: diff --git a/configuration.py b/configuration.py index 812787b..80ec03d 100644 --- a/configuration.py +++ b/configuration.py @@ -3,10 +3,29 @@ load_dotenv() -LOGLEVEL = logging.DEBUG +#.ENVS + +if os.getenv("LOGLEVEL") is None: + LOGLEVEL=logging.INFO +else: + LOGLEVEL = int(os.getenv("LOGLEVEL")) + ### LOGLEVELS + + #CRITICAL = 50 + #FATAL = CRITICAL + #ERROR = 40 + #WARNING = 30 + #WARN = WARNING + #INFO = 20 + #DEBUG = 10 + #NOTSET = 0 + DISCORD_TOKEN = os.getenv("DISCORD_TOKEN_ENV") -GUILD_ID = "757271564227182602" # string -CHANNEL_ID = 815716163102179350 # int +GUILD_ID = os.getenv("GUILD_ID") # string +CHANNEL_ID = os.getenv("CHANNEL_ID") # int + +#.ENVS + DATA_FOLDER = "data" USERS_JSON_FILE_NAME = "users.json" USERS_JSON_FILE_PATH = os.path.join(DATA_FOLDER,USERS_JSON_FILE_NAME) diff --git a/main.py b/main.py index a1270e3..2865c66 100644 --- a/main.py +++ b/main.py @@ -13,7 +13,7 @@ import configuration from configuration import LOGLEVEL, DATA_FOLDER, USERS_JSON_FILE_PATH, GOODREADS_SERVICE, BOOKWYRM_SERVICE from classes import Review, BookUser, check_new_reviews, get_stars, read_json_data -from classes import extract_user_from_url, read_json_data, write_to_users_json +from classes import extract_user_from_url, read_json_data, write_to_users_json, format_review_text from exceptions import UrlNotValid from rss_helper import RSSHelper from rss_helper import DATE_FORMAT_INPUT, DATE_FORMAT_OUTPUT @@ -122,8 +122,8 @@ async def timer(self, channel, force_check=False): if review['review_text'] == "": embed.description = f"{review['author']}" else: - embed.description = f"{review['author']}\n\n>>> {review['review_text']}" - log.debug(f"Review sent for user: {review['username']}") + embed.description = format_review_text(review) + log.info(f"Review sent for user: {review['username']}") await channel.send(embed=embed, mention_author=True) self.msg_sent = True reviews = [] diff --git a/rss_helper.py b/rss_helper.py index 681eeab..51de633 100644 --- a/rss_helper.py +++ b/rss_helper.py @@ -13,7 +13,7 @@ from configuration import LOGLEVEL, DATA_FOLDER, USERS_JSON_FILE_PATH, GOODREADS_SERVICE from configuration import TIME_ZONE, DATE_FORMAT_INPUT, DATE_FORMAT_OUTPUT import pytz -from classes import Review, BookUser, read_json_data, write_to_users_json +from classes import Review, BookUser, read_json_data, write_to_users_json, is_old_review import bookwyrm USERS_JSON_FILE_PATH = "data/users.json" @@ -100,6 +100,10 @@ def get_rss_data_goodreads(self, users: List[BookUser]) -> List[Review]: author_extract = second_href[second_href.find('") + 1: author_extract.find("")] + # Extract Review URL + review_url = entry.link + log.debug(f"Review URL found: {review_url}") + # Extract Score if star_position != -1: score = info[star_position - 2: star_position].strip() @@ -130,8 +134,7 @@ def get_rss_data_goodreads(self, users: List[BookUser]) -> List[Review]: user_image_url = get_user_image(user["id"]) except: user_image_url = "https://i.imgur.com/9pNffkj.png" - - reviews.append( { + review = { "title": title, "score": int(score), "author": author, @@ -141,9 +144,14 @@ def get_rss_data_goodreads(self, users: List[BookUser]) -> List[Review]: "username": username, "user_image_url": user_image_url, "review_text": review_text, + "review_url": review_url, "review_time_stamp": review_date_timezoned.strftime(DATE_FORMAT_OUTPUT), - }) + } + reviews.append(review) log.debug(f"Review found from: {username} for: {title}") + if is_old_review(user, review): + log.info(f"Finished checking reviews for user {username}, found old review") + break except Exception as error: console.print_exception() # log.debug(f"Bad entry: {entry}") diff --git a/tests_bookwyrm.py b/tests_bookwyrm.py index 23e39d2..e2fe053 100644 --- a/tests_bookwyrm.py +++ b/tests_bookwyrm.py @@ -8,6 +8,7 @@ from rich.logging import RichHandler from urllib.parse import urlparse, urljoin import re +from bookwyrm import * from classes import Review, BookUser FORMAT = "%(message)s" diff --git a/tests_goodreads.py b/tests_goodreads.py index 0e8d030..b208353 100644 --- a/tests_goodreads.py +++ b/tests_goodreads.py @@ -55,4 +55,6 @@ log.debug("No review text found.") except Exception as error: logging.error(traceback.format_exc()) - #return [] \ No newline at end of file + #return [] + +print(len("aaa"))