Skip to content

Commit

Permalink
updated narrator splitting function
Browse files Browse the repository at this point in the history
Improved over what was before. 
Still more work to do in future though.
  • Loading branch information
erew123 authored Dec 28, 2023
1 parent 8d80151 commit acc4574
Showing 1 changed file with 22 additions and 15 deletions.
37 changes: 22 additions & 15 deletions script.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,9 @@ def voice_preview(string):
return f"[{params['branding']}Server] Audio generation failed. Status: {generate_response.get('status')}"


def replace_asterisk(match):
return match.group(0) + '&quot;<*'

#################################
#### TTS STANDARD GENERATION ####
#################################
Expand All @@ -491,6 +494,7 @@ def output_modifier(string, state):
if not params["activate"]:
return string
original_string = string
print("ORGINAL STRING: ", original_string)
cleaned_string = before_audio_generation(string, params)
if cleaned_string is None:
return
Expand All @@ -508,33 +512,36 @@ def output_modifier(string, state):
.replace("***", "*")
.replace("**", "*")
.replace("\n\n", "\n")
.replace("\n", "")
)
# Clean up two asterisks (narrators) being made between a newline, making it one sentence.
processed_string = re.sub(r'\.\*\n\*', '. ', processed_string)
# Clean up a few other bits.
processed_string = (
processed_string
.replace("&#x27;", "'")
.replace("\n", " ")
# Add special characters to the quote so that we can use it to identify things later after its been split
.replace('&quot;', '&quot;<')
# Capture new conversations which wont have things like &quote in them
.replace('"', '&quot;<')
)
#capturing another outlier in inital character paragraph
#print("processed string 1 is:", processed_string)
processed_string = (processed_string .replace('&quot;\n\n*', '&quot;<*'))
processed_string = (processed_string .replace('&quot;\n', '< '))
processed_string = (processed_string .replace("&#x27;", "'"))
processed_string = (processed_string .replace("\n", " "))
processed_string = (processed_string .replace('&quot;', '&quot;<'))
processed_string = (processed_string .replace('"', '&quot;<'))
#pattern = re.compile(r'(?<=[a-zA-Z!])&quot;<(?=[a-zA-Z])')
pattern = re.compile(r'(?<=[a-zA-Z!?\.])&quot;<(?=[a-zA-Z])')
processed_string = pattern.sub('&quot; ', processed_string)
pattern = re.compile(r'(?<=[a-zA-Z])\*(?=[a-zA-Z])')
processed_string = pattern.sub('*&quot;', processed_string)
processed_string = processed_string.replace('&quot;<. *', '&quot;< *"')
processed_string = processed_string.replace('< *"', '< *')
processed_string = processed_string.replace('. *', '< *')
#print("processed string 2 is:", processed_string)
processed_string = processed_string.replace('! *', '&quot;< *')
processed_string = processed_string.replace('? *', '< *')
processed_string = processed_string.replace('* ', '* &quot;<')
processed_string = processed_string.replace('*. ', '* &quot;<')
processed_string = re.sub(r'\*(?=[a-zA-Z])', replace_asterisk, processed_string)
# Set up a tracking of the individual wav files.
audio_files_all_paragraphs = []
# Split the line using &quot; and ".* " (so end of sentences, leaving special characters added to the start of all OTHER sentences, bar possibly the first one if its starting with a *
parts = re.split(r'&quot;|\.\*', processed_string)
audio_files_paragraph = []
for i, part in enumerate(parts):
# Skip parts that are too short
if len(part.strip()) <= 1:
if len(part.strip()) <= 3:
continue
# Figure out which type of line it is, then replace characters as necessary to avoid TTS trying to pronunce them, htmlunescape after.
# Character will always be a < with a letter immediately after it
Expand Down

0 comments on commit acc4574

Please sign in to comment.