Skip to content

Commit

Permalink
Improve score handling; Reduce limits; Remove unused xpath function
Browse files Browse the repository at this point in the history
  • Loading branch information
djdembeck committed Aug 30, 2021
1 parent 93da952 commit ca10e17
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 121 deletions.
253 changes: 134 additions & 119 deletions Contents/Code/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
from update_tools import UpdateTool
from urls import SiteUrl

VERSION_NO = '2021.08.28.2'
VERSION_NO = '2021.08.29.1'

# Delay used when requesting HTML,
# may be good to have to prevent being banned from the site
REQUEST_DELAY = 10
REQUEST_DELAY = 1

# Starting value for score before deductions are taken.
INITIAL_SCORE = 100
Expand All @@ -23,7 +23,7 @@
# Any score lower than this will be ignored.
IGNORE_SCORE = 45

THREAD_MAX = 20
#THREAD_MAX = 20

# Setup logger
log = Logging()
Expand Down Expand Up @@ -251,6 +251,7 @@ def update(self, metadata, media, lang, force=False):
html = HTML.ElementFromURL(url, sleep=REQUEST_DELAY)
except Exception as e:
log.info(e)

# Instantiate update helper
update_helper = UpdateTool(force, lang, media, metadata, url)

Expand Down Expand Up @@ -356,10 +357,6 @@ def doSearch(self, ctx, url):
# Set append to the returned array from this function
found = self.before_xpath(ctx, found, html)

log.separator(msg='just after new xpath line', log_level="debug")
# Set append to the returned array from this function
found = self.after_xpath(ctx, found, html)

return found

def before_xpath(self, ctx, found, html):
Expand All @@ -380,6 +377,12 @@ def before_xpath(self, ctx, found, html):
)
datetext = re.sub(r'[^0-9\-]', '', datetext)
date = self.getDateFromString(datetext)
language = self.getStringContentFromXPath(
r, (
u'div/div/div/div/div/div/span/ul/li'
'[contains (@class,"languageLabel")]/span'
)
).split()[1]
narrator = self.getStringContentFromXPath(
r, (
u'div/div/div/div/div/div/span/ul/li'
Expand All @@ -405,54 +408,7 @@ def before_xpath(self, ctx, found, html):
{
'author': author,
'date': date,
'narrator': narrator,
'thumb': thumb,
'title': title,
'url': murl,
}
)
return found

def after_xpath(self, ctx, found, html):
for r in html.xpath(
'//div[contains (@class, "adbl-search-result")]'
):
author = self.getStringContentFromXPath(
r, (
'div/div/ul/li/'
'/a[contains (@class,"author-profile-link")][1]'
)
)
date = self.getDateFromString(
self.getStringContentFromXPath(
r, (
u'div/div/ul/li[contains (., "{0}")]'
'/span[2]//text()'
).format(
ctx['REL_DATE']
)
)
)
murl = self.getAnchorUrlFromXPath(
r, 'div/div/div/div/a[1]'
)
narrator = self.getStringContentFromXPath(
r, u'div/div/ul/li[contains (., "{0}")]//a[1]'.format(
ctx['NAR_BY']
)
)
thumb = self.getImageUrlFromXPath(
r, 'div[contains (@class,"adbl-prod-image-sample-cont")]/a/img'
)
title = self.getStringContentFromXPath(
r, 'div/div/div/div/a[1]'
)
log.separator(msg='XPATH SEARCH HIT', log_level="debug")

found.append(
{
'author': author,
'date': date,
'language': language,
'narrator': narrator,
'thumb': thumb,
'title': title,
Expand All @@ -471,73 +427,16 @@ def run_search(self, helper, result):
if not valid_itemId:
continue

title = f['title']
thumb = f['thumb']
date = f['date']
year = ''
author = f['author']
narrator = f['narrator']

if date is not None:
year = date.year

# Make sure this isn't a pre-order listing
if helper.check_if_preorder(date):
continue

# Score the album name
scorebase1 = helper.media.album
scorebase2 = title.encode('utf-8')
album_score = INITIAL_SCORE - Util.LevenshteinDistance(
scorebase1, scorebase2
)
log.debug("Score from album: " + str(album_score))

# Score the author name
if helper.media.artist:
scorebase3 = helper.media.artist
scorebase4 = author
author_score = INITIAL_SCORE - Util.LevenshteinDistance(
scorebase3, scorebase4
)
log.debug("Score from author: " + str(author_score))
# Find the difference in score between name and author
score = (
album_score + author_score
) - INITIAL_SCORE
else:
score = album_score

log.info("Result #" + str(i + 1))
# Log basic metadata
data_to_log = [
{'ID is': valid_itemId},
{'Title is': title},
{'Author is': author},
{'Narrator is': narrator},
{'Date is ': str(date)},
{'Score is': str(score)},
{'Thumb is': thumb},
]
log.metadata(data_to_log, log_level="info")

if score >= IGNORE_SCORE:
info.append(
{
'id': valid_itemId,
'title': title,
'year': year,
'date': date,
'score': score,
'thumb': thumb,
'artist': author
}
)
else:
log.info(
'# Score is below ignore boundary (%s)... Skipping!',
IGNORE_SCORE
)
self.score_result(f, helper, i, info, valid_itemId, year)

# Print separators for easy reading
if i <= len(result):
Expand All @@ -546,6 +445,110 @@ def run_search(self, helper, result):
info = sorted(info, key=lambda inf: inf['score'], reverse=True)
return info

def score_result(self, f, helper, i, info, valid_itemId, year):
author = f['author']
date = f['date']
language = f['language']
narrator = f['narrator']
thumb = f['thumb']
title = f['title']

# Array to hold score points for processing
all_scores = []

# Album name score
all_scores.append(
self.score_album(helper, title)
)
# Author name score
all_scores.append(
self.score_author(author, helper)
)
# Library language score
all_scores.append(
self.score_language(helper, language)
)

# Because builtin sum() isn't available
sum=lambda numberlist:reduce(lambda x,y:x+y,numberlist,0)
# Subtract difference from initial score
score = INITIAL_SCORE - sum(all_scores)

log.info("Result #" + str(i + 1))
# Log basic metadata
data_to_log = [
{'ID is': valid_itemId},
{'Title is': title},
{'Author is': author},
{'Narrator is': narrator},
{'Date is ': str(date)},
{'Score is': str(score)},
{'Thumb is': thumb},
]
log.metadata(data_to_log, log_level="info")

if score >= IGNORE_SCORE:
info.append(
{
'id': valid_itemId,
'title': title,
'year': year,
'date': date,
'score': score,
'thumb': thumb,
'artist': author
}
)
else:
log.info(
'# Score is below ignore boundary (%s)... Skipping!',
IGNORE_SCORE
)

def score_album(self, helper, title):
"""
Compare the input album similarity to the search result album.
Score is calculated with LevenshteinDistance
"""
scorebase1 = helper.media.album
scorebase2 = title.encode('utf-8')
album_score = Util.LevenshteinDistance(
scorebase1, scorebase2
)
log.debug("Score from album: " + str(album_score))
return album_score

def score_author(self, author, helper):
"""
Compare the input author similarity to the search result author.
Score is calculated with LevenshteinDistance
"""
if helper.media.artist:
scorebase3 = helper.media.artist
scorebase4 = author
author_score = Util.LevenshteinDistance(
scorebase3, scorebase4
)
log.debug("Score from author: " + str(author_score))
return author_score

def score_language(self, helper, language):
"""
Compare the library language to search results
and knock off 2 points if they don't match.
"""
lang_dict = {
Locale.Language.English: 'English',
'de': 'German',
'fr': 'French',
'it': 'Italian'
}

if language != lang_dict[helper.lang]:
log.debug("Book is not library language, deduct 2 points")
return 2
return 0

"""
Update functions that require PMS imports,
thus we cannot 'outsource' them to UpdateTool
Expand Down Expand Up @@ -669,7 +672,9 @@ def handle_series(self, helper, html):
r, '//li[contains(@class, "seriesLabel")]//a[2]'
)

helper.series_def = helper.series2 if helper.series2 else helper.series
helper.series_def = (
helper.series2 if helper.series2 else helper.series
)

helper.volume = self.getStringContentFromXPath(
r, '//li[contains(@class, "seriesLabel")]/text()[2]'
Expand All @@ -682,7 +687,9 @@ def handle_series(self, helper, html):
if helper.volume2 == ",":
helper.volume2 = ""

helper.volume_def = helper.helper.volume2 if helper.volume2 else helper.volume
helper.volume_def = (
helper.helper.volume2 if helper.volume2 else helper.volume
)

# fix series when audible 'forgets' the series link…
if not helper.series_def:
Expand Down Expand Up @@ -718,15 +725,23 @@ def compile_metadata(self, helper):
# Other metadata
helper.metadata.title = helper.title
helper.metadata.title_sort = ' - '.join(
filter(None, [(helper.series_def + helper.volume_def), helper.title])
filter(
None, [(helper.series_def + helper.volume_def), helper.title]
)
)
helper.metadata.studio = helper.studio
helper.metadata.summary = helper.synopsis

if Prefs['cover_options'] == "Use Audible cover":
helper.metadata.posters[1] = Proxy.Media(HTTP.Request(helper.thumb))
if Prefs['cover_options'] == (
"Use Audible cover"
):
helper.metadata.posters[1] = Proxy.Media(
HTTP.Request(helper.thumb)
)
helper.metadata.posters.validate_keys(helper.thumb)
elif Prefs['cover_options'] == "Download cover but don't overwrite existing":
elif Prefs['cover_options'] == (
"Download cover but don't overwrite existing"
):
helper.metadata.posters[helper.thumb] = Proxy.Media(
HTTP.Request(helper.thumb), sort_order=1
)
Expand Down
3 changes: 1 addition & 2 deletions Contents/Code/search_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,5 @@ def validate_author_name(self):
log.info(
"Artist name seems to be bad, "
"not using it in search."

)
)
break

0 comments on commit ca10e17

Please sign in to comment.