Skip to content

Commit

Permalink
Merge pull request #68 from erew123/dev
Browse files Browse the repository at this point in the history
Update to Main
  • Loading branch information
erew123 authored Jan 13, 2024
2 parents e43c58a + 5c0f08d commit 180adb4
Show file tree
Hide file tree
Showing 10 changed files with 3,354 additions and 95 deletions.
287 changes: 216 additions & 71 deletions README.md

Large diffs are not rendered by default.

503 changes: 503 additions & 0 deletions atsetup.bat

Large diffs are not rendered by default.

440 changes: 440 additions & 0 deletions atsetup.sh

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,13 @@ def log_system_info():

# Print colored output
print(f"{package_name.ljust(max_package_length)} Required: {color_required}{operator} {required_version.ljust(12)}\033[0m Installed: {color_installed}{installed_version}\033[0m")

print("\nOn Nvidia Graphics cards machines, if your \033[92mInstalled\033[0m version of \033[92mTorch\033[0m and \033[92mTorchaudio\033[0m does")
print("not have \033[92m+cu118\033[0m (Cuda 11.8) or \033[92m+cu121\033[0m (Cuda 12.1) listed after them, you do not have CUDA")
print("installed for Torch or Torchaudio in this Python environment. This will cause you problems")
print("with \033[94mAllTalk\033[0m and \033[94mFinetuning.\033[0m You may have to 'pip install' a new version of torch and")
print("torchaudio, using '\033[94m--upgrade --force-reinstall\033[0m' with the correct version of PyTorch for\033[0m")
print("your Python environment.\033[0m")
print("\033[94m\nRequirements file specifier meanings:\033[0m")
explanation = textwrap.dedent("""
== Exact version != Any version except < Less than
Expand Down
4 changes: 3 additions & 1 deletion finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,9 @@ def refresh():
#### &nbsp;&nbsp;&nbsp;&nbsp; - Place your audio files in <span style="color: #3366ff;">{str(audio_folder)}</span>
#### &nbsp;&nbsp;&nbsp;&nbsp; - Your audio samples can be in the format <span style="color: #3366ff;">mp3, wav,</span> or <span style="color: #3366ff;">flac.</span>
#### &nbsp;&nbsp;&nbsp;&nbsp; - You will need a minimum of <span style="color: #3366ff;">2 minutes</span> of audio in either one or multiple audio files. Very small sample files cause errors, so I would suggest 30 seconds and longer samples.
#### &nbsp;&nbsp;&nbsp;&nbsp; - When you have completed Steps 1, 2, and 3, you are welcome to delete your samples from "put-voice-samples-in-here".<br>
#### &nbsp;&nbsp;&nbsp;&nbsp; - When you have completed Steps 1, 2, and 3, you are welcome to delete your samples from "put-voice-samples-in-here".
#### &nbsp;&nbsp;&nbsp;&nbsp; - FYI Anecdotal evidence suggests that the Whisper 2 model may yield superior results in audio splitting and dataset creation.
#### &nbsp;&nbsp;&nbsp;&nbsp; - If this step is failing, it will be worth running the diagnostics with atsetup and confirming you have cu118 or cu112 listed against your torch and torchaudio.<br>
### 🟨 <u>What this step is doing</u>
#### &nbsp;&nbsp;&nbsp;&nbsp; - With step one, we are going to be stripping your audio file(s) into smaller files, using Whisper to find spoken words/sentences, compile that into excel sheets of training data, ready for finetuning the model on Step 2.
#### &nbsp;&nbsp;&nbsp;&nbsp; - Whilst you can choose multiple Whisper models, its best only to use the 1 model as each one is about 3GB in size and will download to your local huggingface cache on first-time use. If and when you have completed training, you wish to delete this 3GB model from your system, you are welcome to do so.
Expand Down
11 changes: 7 additions & 4 deletions script.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,10 +237,12 @@ def signal_handler(sig, frame):
# Check if the subprocess has started successfully
if process.poll() is None:
print(f"[{params['branding']}Startup] TTS Subprocess starting")
print(f"[{params['branding']}Startup]")
print(
f"[{params['branding']}Startup] Readme available here:",
f"http://{params['ip_address']}:{params['port_number']}",
f"[{params['branding']}Startup] \033[94mSettings & Documentation:\033[00m",
f"\033[92mhttp://{params['ip_address']}:{params['port_number']}\033[00m",
)
print(f"[{params['branding']}Startup]")
else:
print(
f"[{params['branding']}Startup] \033[91mWarning\033[0m TTS Subprocess Webserver failing to start process"
Expand Down Expand Up @@ -862,7 +864,8 @@ def ui():
voice.change(lambda x: params.update({"voice": x}), voice, None)
language.change(lambda x: params.update({"language": x}), language, None)

# TTS Settings (Not yet parsed to api/implemented)

# TS Settings (Not yet parsed to api/implemented)
# local_temperature_gr.change(lambda x: params.update({"local_temperature": x}), local_temperature_gr, None)
# local_repetition_penalty_gr.change(lambda x: params.update({"local_repetition_penalty": x}), local_repetition_penalty_gr, None)

Expand All @@ -885,4 +888,4 @@ def ui():
try:
time.sleep(1) # Add a small delay to avoid high CPU usage
except KeyboardInterrupt:
break # Allow graceful exit on Ctrl+C
break # Allow graceful exit on Ctrl+C
Loading

0 comments on commit 180adb4

Please sign in to comment.