Skip to content

Commit

Permalink
Further Refine Summary Sanitizer
Browse files Browse the repository at this point in the history
- tested against an anidb xml cache
  • Loading branch information
natyusha committed Sep 7, 2024
1 parent e93af30 commit c72c7b7
Showing 1 changed file with 7 additions and 6 deletions.
13 changes: 7 additions & 6 deletions Contents/Code/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,12 +415,13 @@ def image_add(self, meta, images):
def summary_sanitizer(summary):
if summary:
if Prefs['sanitizeSummary'] != 'Allow Both Types':
if Prefs['sanitizeSummary'] != 'Allow Info Lines' : summary = re.sub(r'\b(Sour?ce|Note|Summ?ary):([^\r\n]+|$)', '', summary, flags=re.M|re.S) # Remove the line if it starts with ("Source: ", "Note: ", "Summary: ")
if Prefs['sanitizeSummary'] != 'Allow Misc. Lines' : summary = re.sub(ur'^(\*|\u2014|- translated|~) ([^\r\n]+|$)', '', summary, flags=re.M|re.U) # Remove the line if it starts with ("* ", "— ", "- ", "~ ")
summary = re.sub(r'(?:http:\/\/anidb\.net\/(?:ch|cr|[feat]|(?:character|creator|file|episode|anime|tag)\/)(?:\d+)) \[([^\]]+)]', r'\1', summary) # Replace anidb links with text
summary = re.sub(r'\[i\](.*?)\[\/i\]', '', summary, flags=re.S) # Remove leftover BBCode [i] tags (AniDB API Bug)
summary = re.sub(r'\n\n+', r'\n\n', summary) # Condense stacked empty lines
summary = re.sub(r' +', ' ', summary).strip(' \n') # Remove double spaces and strip spaces and newlines
if Prefs['sanitizeSummary'] != 'Allow Info Lines' : summary = re.sub(r'\b((Modified )?Sour?ce|Note( [1-9])?|Summ?ary):(?!$)([^\r\n]+|$)', '', summary, flags=re.I|re.M) # Remove the line if it starts with ("Source: ", "Note: ", "Summary: ")
if Prefs['sanitizeSummary'] != 'Allow Misc. Lines' : summary = re.sub(ur'^(\*|\u2014 (adapted|source:?|summary|translated|written)|- (translated)|~ (adapted|description|summary|translated)) ([^\r\n]+|$)', '', summary, flags=re.I|re.M|re.U) # Remove the line if it starts with ("* ", "— ", "- ", "~ ")
summary = re.sub(r'(?:http:\/\/anidb\.net\/(?:ch|co|cr|[feast]|(?:character|creator|file|episode|anime|tag)\/)(?:\d+)) \[([^\]]+)]', r'\1', summary) # Replace AniDB links with text
summary = re.sub(r'\[i\](?!"The Sasami|"Stellar|In the distant| occurred in)(.*?)\[\/i\]', '', summary, flags=re.I|re.S) # Remove BBCode [i][/i] tags and their contents (AniDB API Bug)
summary = re.sub(r'(\[i\]|\[\/i\])', '', summary, flags=re.I) # Remove solitary leftover BBCode [i] or [/i] tags (AniDB API Bug)
summary = re.sub(r'\n\n+', r'\n\n', summary) # Condense stacked empty lines
summary = re.sub(r' +', ' ', summary).strip(' \n') # Remove double spaces and strip spaces and newlines
if not summary: summary = None # For logging purposes
return summary

Expand Down

0 comments on commit c72c7b7

Please sign in to comment.