From 86bd6120830232e9f1c0466f5722bd4f8e1c6822 Mon Sep 17 00:00:00 2001
From: Drew Thomasson <126999465+DrewThomasson@users.noreply.github.com>
Date: Thu, 14 Dec 2023 14:49:09 -0500
Subject: [PATCH] Update gui_run.py made chapters separate by detected chapters
 auo if epub

I found that i could extract the exact names of the chapters in a epub file so i made it so that if its a epub file then dont use the chapter deliminator ignor that only use the list of chapters names as chapter deliminators and also made a small tweek to the file cleanup part opf the code added a part that I didn't notice apparently loll with don't being do n't so i fixed that
---
 gui_run.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 52 insertions(+), 6 deletions(-)

diff --git a/gui_run.py b/gui_run.py
index 00bdf8a..01cd902 100644
--- a/gui_run.py
+++ b/gui_run.py
@@ -7,6 +7,36 @@
 import nltk
 nltk.download('averaged_perceptron_tagger')
 
+epub_file_path = ""
+chapters = []
+
+def convert_epub_and_extract_chapters(epub_path):
+    # Regular expression to match the chapter lines in the output
+    chapter_pattern = re.compile(r'Detected chapter: \* (.*)')
+
+    # List to store the extracted chapter names
+    chapter_names = []
+
+    # Start the conversion process and capture the output
+    process = subprocess.Popen(['ebook-convert', epub_path, '/dev/null'],
+                               stdout=subprocess.PIPE, 
+                               stderr=subprocess.STDOUT,
+                               universal_newlines=True)
+
+    # Read the output line by line
+    for line in iter(process.stdout.readline, ''):
+        print(line, end='')  # You can comment this out if you don't want to see the output
+        match = chapter_pattern.search(line)
+        if match:
+            chapter_names.append(match.group(1))
+
+    # Wait for the process to finish
+    process.stdout.close()
+    process.wait()
+
+    return chapter_names
+
+
 def calibre_installed():
     """Check if Calibre's ebook-convert tool is available."""
     try:
@@ -33,6 +63,7 @@ def convert_with_calibre(file_path, output_format="txt"):
     return output_path
 
 def process_file():
+    global epub_file_path
     file_path = filedialog.askopenfilename(
         title='Select File',
         filetypes=[('Supported Files', 
@@ -40,6 +71,8 @@ def process_file():
                      '*.mobi', '*.odt', '*.pdf', '*.prc', '*.pdb', '*.pml', '*.rb', '*.rtf', '*.snb', 
                      '*.tcr', '*.txt'))]
     )
+    if ".epub" in file_path:
+    	epub_file_path = file_path
     
     if not file_path:
         return
@@ -67,9 +100,14 @@ def process_file():
         "model": "big"
     }
 
+
+
+
     booknlp = BookNLP("en", model_params)
     booknlp.process(file_path, output_directory, book_id)
-
+    global chapters
+    if epub_file_path == "":
+    	chapters = convert_epub_and_extract_chapters(epub_file_path)
     print("Success, File processed successfully!")
     
     # Close the GUI
@@ -194,8 +232,6 @@ def main():
 
 
 
-
-
 import pandas as pd
 import re
 import glob
@@ -419,7 +455,7 @@ def process_text(text):
     # Apply the rule to remove spaces before punctuation and other non-alphanumeric characters
     text = re.sub(r' (?=[^a-zA-Z0-9\s])', '', text)
     # Replace " n’t" with "n’t"
-    text = text.replace(" n’t", "n’t").replace("[", "(").replace("]", ")").replace("gon na", "gonna").replace("—————–", "")
+    text = text.replace(" n’t", "n’t").replace("[", "(").replace("]", ")").replace("gon na", "gonna").replace("—————–", "").replace(" n't", "n't")
     return text
 
 def process_file(filename):
@@ -457,7 +493,6 @@ def main():
 
 
 
-
 #this will wipe the computer of any current audio clips from a previous session
 #but itll ask the user first
 import os
@@ -1850,6 +1885,7 @@ def generate_audio(text, audio_id, language, speaker, voice_actor):
     voice_actor = voice_actor
     language = language
     index = int(re.search(r'\d+', audio_id.split('_')[1]).group())
+    global chapters
 
     # Get device
     global multi_voice_model_voice_list1
@@ -1887,7 +1923,17 @@ def generate_audio(text, audio_id, language, speaker, voice_actor):
 
     language_code = language  # Default to 'en' if not found
 
-    if CHAPTER_KEYWORD in text.upper():
+#if the chapter list is empty then don't use it if its empty then continue using the set chapter deliminator
+    if len(chapters) == 0:
+         for chapter in chapters:
+         	if chapter in text:
+         		print(f"chapter num: {chapter_num}")
+         		print(f"CHAPTER IS: {chapter}")
+         		chapter_num += 1
+         		
+    	
+
+    elif CHAPTER_KEYWORD in text.upper():
         chapter_num += 1
         print(f"chapter num: {chapter_num}")
         print(f"CHAPTER KEYWORD IS: {CHAPTER_KEYWORD}")