Skip to content

Commit

Permalink
Merge pull request #3 from Inervo:Inervo/issue2
Browse files Browse the repository at this point in the history
Timer instead of proxy & series only mode
  • Loading branch information
Inervo authored May 4, 2023
2 parents 1c2ee2f + 95ae1ee commit 27a7c01
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 24 deletions.
22 changes: 12 additions & 10 deletions bedethequeApi.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import re
import requests
import time
from bs4 import BeautifulSoup
from log import logger

def find_series_url(comic_series_name, proxy = None) -> str:
def find_series_url(comic_series_name, proxy = None, wait_delay = None) -> str:
url = None
logger.info("No url in komga for serie %s, searching bedetheque by name", comic_series_name)
if " " in comic_series_name:
series_to_find = remove_accents(comic_series_name).split(" ")[0]
else:
series_to_find = remove_accents(comic_series_name)
searchurl = f'https://www.bedetheque.com/bandes_dessinees_{series_to_find.lower()}.html'
soup = get_soup(searchurl, proxy = proxy)
soup = get_soup(searchurl, proxy = proxy, wait_delay = wait_delay)
list_results = soup.find("div", class_="widget-magazine")
if not list_results:
logger.warning("%s not found on bedetheque", comic_series_name)
Expand Down Expand Up @@ -42,9 +43,9 @@ def find_series_url(comic_series_name, proxy = None) -> str:
logger.info("Url found for %s", comic_series_name)
return url

def find_comic_url(comic_name, comic_booknumber, serie_url, proxy = None) -> str:
def find_comic_url(comic_name, comic_booknumber, serie_url, proxy = None, wait_delay = None) -> str:
logger.info("No url in komga for tome %s, searching bedetheque by name", comic_name)
soup = get_soup(serie_url, proxy = proxy)
soup = get_soup(serie_url, proxy = proxy, wait_delay = wait_delay)
if albums := soup.find("div", class_="tab_content_liste_albums"):
for album in albums.find_all("li"):
if album.find("label").text.strip().removesuffix(".").lower() == comic_booknumber:
Expand Down Expand Up @@ -73,7 +74,7 @@ def get_number_of_albums(soup:BeautifulSoup) -> int:
total_book_number = soup.find("div", class_="bandeau-info serie").find("i", class_="icon-book").parent.text.strip(' albums')
return total_book_number

def get_soup(url: str, proxy = None) -> BeautifulSoup:
def get_soup(url: str, proxy = None, wait_delay = None) -> BeautifulSoup:
session = requests.Session()
session.cookies.update(
{
Expand Down Expand Up @@ -107,8 +108,9 @@ def get_soup(url: str, proxy = None) -> BeautifulSoup:
logger.warning("Failed to get page with the current proxy : %s, removing it and trying with the next one", currentProxy)
currentProxy = proxy.removeProxyAndGetNew(currentProxy)
else:
logger.warning("Getting soup without proxy")
page = session.get(url, timeout=5)
if wait_delay:
time.sleep(wait_delay)
return BeautifulSoup(page.content, "html.parser")

def remove_accents(comic_series_name) -> str:
Expand All @@ -133,11 +135,11 @@ def isValidISBN(isbn):
+ sum(int(ch) * 3 for ch in isbn[1::2]))
return product % 10 == 0

def get_comic_series_metadata(url: str, proxy = None):
def get_comic_series_metadata(url: str, proxy = None, wait_delay = None):
metadata = None
genres = None

soup = get_soup(url, proxy = proxy)
soup = get_soup(url, proxy = proxy, wait_delay = wait_delay)
if not soup.find("div", class_="bandeau-info serie"):
logger.error("Error reading url %s", url)
return None
Expand All @@ -159,7 +161,7 @@ def get_comic_series_metadata(url: str, proxy = None):
}
return metadata

def get_comic_book_metadata(comic_url: str, proxy = None):
def get_comic_book_metadata(comic_url: str, proxy = None, wait_delay = None):
title = ''
isbn = ''
releaseDate = ''
Expand All @@ -171,7 +173,7 @@ def get_comic_book_metadata(comic_url: str, proxy = None):
lettrages = []
couvertures = []

soup = get_soup(comic_url, proxy = proxy)
soup = get_soup(comic_url, proxy = proxy, wait_delay = wait_delay)
if not soup.find("meta", attrs={'name': 'description'}):
logger.error("Error reading url %s", comic_url)
return None
Expand Down
7 changes: 7 additions & 0 deletions config.template.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,10 @@
# Only on these 2 fields can be completed. Of both are, it will generate an error
KOMGA_LIBRARY_LIST = [] # retrieve library value from library URL in Komga
KOMGA_COLLECTION_LIST = [] # retrieve collection value from collection URL in Komga

# Optionnal. Set this to True to refresh only series (not books of a series)
SERIES_ONLY = False

# Optionnal. Do not use proxies, but a delay between each request. Default:7 (seconds)
WAIT_DELAY = 7
USE_PROXIES = True
13 changes: 13 additions & 0 deletions env.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,19 @@ def __init__(self):
self.all_series = []
self.status_to_update = KOMGA_STATUS

try:
self.series_only = SERIES_ONLY
except NameError:
self.series_only = False
try:
self.wait_delay = WAIT_DELAY
except NameError:
self.wait_delay = 7
try:
self.use_proxies = USE_PROXIES
except NameError:
self.use_proxies = True

if KOMGA_LIBRARY_LIST:
if KOMGA_COLLECTION_LIST or KOMGA_SERIE_LIST:
logger.error(
Expand Down
40 changes: 26 additions & 14 deletions refreshMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,18 @@ def refresh_metadata():

komga = env.komga
all_series = env.all_series
proxy = bedethequeApiProxies()
proxy = None
if env.use_proxies:
proxy = bedethequeApiProxies()
else:
logger.warning("Mode No proxy requested")
if env.wait_delay<5:
chooseToContinue = input("Delay between each request is very low. Are you sure you want to continue (risk of ban) (Y/N): ")
if chooseToContinue.lower() == 'y':
logger.warning("Continuing with low delay")
else:
logger.error("Choose to not continue. Exiting")
exit()

# Loop through each book series
for serie in all_series:
Expand All @@ -31,27 +42,27 @@ def refresh_metadata():
serie_url = link['url']
break
if serie_url is None:
serie_url = find_series_url(serie_name, proxy = proxy)
serie_url = find_series_url(serie_name, proxy = proxy, wait_delay = env.wait_delay)

#get the metadata for the series from bedetheque
if serie_url is None:
logger.warning("No URL found for %s, skipping metadata refresh for this serie", serie_name)
refresh_book_metadata(komga, serie_id, serie_url, proxy = proxy)
refresh_book_metadata(komga, serie_id, serie_url, proxy = proxy, wait_delay = env.wait_delay)
continue
bedetheque_metadata = get_comic_series_metadata(serie_url, proxy = proxy)
bedetheque_metadata = get_comic_series_metadata(serie_url, proxy = proxy, wait_delay = env.wait_delay)

#checking we have no issue with the metadata
if bedetheque_metadata is None:
logger.warning("Incorrect URL found for %s, trying to look for the URL", serie_name)
serie_url = find_series_url(serie_name, proxy = proxy)
serie_url = find_series_url(serie_name, proxy = proxy, wait_delay = env.wait_delay)
if serie_url is None:
logger.warning("No URL found for %s, skipping metadata refresh for this serie", serie_name)
refresh_book_metadata(komga, serie_id, serie_url, proxy = proxy)
refresh_book_metadata(komga, serie_id, serie_url, proxy = proxy, wait_delay = env.wait_delay)
continue
bedetheque_metadata = get_comic_series_metadata(serie_url, proxy = proxy)
bedetheque_metadata = get_comic_series_metadata(serie_url, proxy = proxy, wait_delay = env.wait_delay)
if bedetheque_metadata is None:
logger.error("Error while parsing URL %s found for %s, skipping metadata refresh for this serie", serie_url, serie_name)
refresh_book_metadata(komga, serie_id, serie_url, proxy = proxy)
refresh_book_metadata(komga, serie_id, serie_url, proxy = proxy, wait_delay = env.wait_delay)
continue

# Prepare the metadata
Expand Down Expand Up @@ -80,11 +91,12 @@ def refresh_metadata():
continue

# call the refresh on the books of this serie
refresh_book_metadata(komga, serie_id, serie_url, proxy = proxy)
if not env.series_only:
refresh_book_metadata(komga, serie_id, serie_url, proxy = proxy, wait_delay = env.wait_delay)
logger.info("Refresh successfully terminated")


def refresh_book_metadata(komga, series_id, serie_url, proxy = None):
def refresh_book_metadata(komga, series_id, serie_url, proxy = None, wait_delay = None):
'''
Refresh Book Metadata
'''
Expand All @@ -107,22 +119,22 @@ def refresh_book_metadata(komga, series_id, serie_url, proxy = None):
if serie_url is None:
logger.warning("No URL found for %s, skipping metadata refresh for this book", book_name)
continue
book_url = find_comic_url(book_name, book['metadata']['number'], serie_url, proxy = proxy)
book_url = find_comic_url(book_name, book['metadata']['number'], serie_url, proxy = proxy, wait_delay = wait_delay)

#get the metadata for the series from bedetheque
if book_url is None:
logger.warning("No URL found for %s, skipping metadata refresh for this book", book_name)
continue
bedetheque_metadata = get_comic_book_metadata(book_url, proxy = proxy)
bedetheque_metadata = get_comic_book_metadata(book_url, proxy = proxy, wait_delay = wait_delay)

#checking we have no issue with the metadata
if bedetheque_metadata is None:
logger.warning("Incorrect URL found for %s, trying to look for the URL", book_name)
book_url = find_comic_url(book_name, book['metadata']['number'], serie_url, proxy = proxy)
book_url = find_comic_url(book_name, book['metadata']['number'], serie_url, proxy = proxy, wait_delay = wait_delay)
if book_url is None:
logger.warning("No URL found for %s, skipping metadata refresh for this book", book_name)
continue
bedetheque_metadata = get_comic_book_metadata(book_url, proxy = proxy)
bedetheque_metadata = get_comic_book_metadata(book_url, proxy = proxy, wait_delay = wait_delay)
if bedetheque_metadata is None:
logger.error("Error while parsing URL %s found for %s, skipping metadata refresh for this book", book_url, book_name)
continue
Expand Down

0 comments on commit 27a7c01

Please sign in to comment.