Skip to content

Commit

Permalink
[beets/autotag/hooks] Rework article switching in string distance
Browse files Browse the repository at this point in the history
The new version doesn't rely on regular expressions, provides more
intuitive names, and will probably be easier to maintain.

See: <beetbox#5337 (comment)>
  • Loading branch information
Arav K. committed Sep 8, 2024
1 parent ed52d0b commit 15c8ac3
Showing 1 changed file with 11 additions and 6 deletions.
17 changes: 11 additions & 6 deletions beets/autotag/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,6 @@ def copy(self) -> TrackInfo:
# Candidate distance scoring.

# Parameters for string distance function.
# Words that can be moved to the end of a string using a comma.
SD_END_REPLACE = re.compile(r"^(.*), (the|a|an)$")
# Reduced weights for certain portions of the string.
SD_PATTERNS = [
(r"^the ", 0.1),
Expand Down Expand Up @@ -311,17 +309,24 @@ def string_dist(str1: Optional[str], str2: Optional[str]) -> float:
if str1 is None or str2 is None:
return 1.0

# Make all following comparison case-insensitive.
str1 = str1.lower()
str2 = str2.lower()

# Don't penalize strings that move certain words to the end. For
# example, "the something" should be considered equal to
# "something, the".
def replacer(m: re.Match[str]) -> str:
return f"{m.group(2)} {m.group(1)}"
def switch_article(string: str) -> str:
if ", " not in string:
return string
[title, article] = string.rsplit(", ", maxsplit=1)
if article in ["the", "a", "an"]:
return f"{article} {title}"
else:
return string

str1 = re.sub(SD_END_REPLACE, replacer, str1)
str2 = re.sub(SD_END_REPLACE, replacer, str2)
str1 = switch_article(str1)
str2 = switch_article(str2)

# Perform a couple of basic normalizing substitutions.
for pat, repl in SD_REPLACE:
Expand Down

0 comments on commit 15c8ac3

Please sign in to comment.