Skip to content

Commit

Permalink
Updated UI
Browse files Browse the repository at this point in the history
  • Loading branch information
Plachtaa committed Aug 27, 2023
1 parent 1712a6f commit d27f645
Show file tree
Hide file tree
Showing 9 changed files with 45 additions and 0 deletions.
5 changes: 5 additions & 0 deletions descriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,9 @@
You need to **"Make prompt"** first, and upload the encoded prompt (a `.npz` file)
"""

long_text_md = """
Very long text is chunked into several sentences, and each sentence is synthesized separately.<br>
Please make a prompt or use a preset prompt to infer long text.
"""

long_text_example = "Just a few years ago, there were no legions of deep learning scientists developing intelligent products and services at major companies and startups. When we entered the field, machine learning did not command headlines in daily newspapers. Our parents had no idea what machine learning was, let alone why we might prefer it to a career in medicine or law. Machine learning was a blue skies academic discipline whose industrial significance was limited to a narrow set of real-world applications, including speech recognition and computer vision. Moreover, many of these applications required so much domain knowledge that they were often regarded as entirely separate areas for which machine learning was one small component. At that time, neural networks—the predecessors of the deep learning methods that we focus on in this book—were generally regarded as outmoded."
24 changes: 24 additions & 0 deletions examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
infer_from_audio_examples = [
["This is how this machine has taken my voice.", 'English', 'no-accent', "prompts/en-2.wav", None, "Wow, look at that! That's no ordinary Teddy bear!"],
["我喜欢抽电子烟,尤其是锐刻五代。", '中文', 'no-accent', "prompts/zh-1.wav", None, "今天我很荣幸,"],
["私の声を真似するのはそんなに面白いですか?", '日本語', 'no-accent', "prompts/ja-2.ogg", None, "初めまして、朝武よしのです。"],
["你可以听得出来我有多困。", '中文', 'no-accent', "prompts/en-1.wav", None, ""],
["この文は、クロスリンガル合成の例です。", '日本語', 'no-accent', "prompts/zh-2.wav", None, ""],
["Actually, I can't speak English, but this machine helped me do it.", 'English', 'no-accent', "prompts/ja-1.wav", None, ""],
]

make_npz_prompt_examples = [
["Gem-trader", "prompts/en-2.wav", None, "Wow, look at that! That's no ordinary Teddy bear!"],
["Ding Zhen", "prompts/zh-1.wav", None, "今天我很荣幸,"],
["Yoshino", "prompts/ja-2.ogg", None, "初めまして、朝武よしのです。"],
["Sleepy-woman", "prompts/en-1.wav", None, ""],
["Yae", "prompts/zh-2.wav", None, ""],
["Cafe", "prompts/ja-1.wav", None, ""],
]

infer_from_prompt_examples = [
["A prompt contains voice, prosody and emotion information of a certain speaker.", "English", "no-accent", "vctk_1", None],
["This prompt is made with an audio of three seconds.", "English", "no-accent", "librispeech_1", None],
["This prompt is made with Chinese speech", "English", "no-accent", "seel", None],
]

16 changes: 16 additions & 0 deletions launch-ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from utils.g2p import PhonemeBpeTokenizer
from descriptions import *
from macros import *
from examples import *

import gradio as gr
import whisper
Expand Down Expand Up @@ -500,6 +501,11 @@ def main():
btn_mp.click(make_npz_prompt,
inputs=[textbox_mp, upload_audio_prompt, record_audio_prompt, textbox_transcript],
outputs=[text_output, prompt_output])
gr.Examples(examples=infer_from_audio_examples,
inputs=[textbox, language_dropdown, accent_dropdown, upload_audio_prompt, record_audio_prompt, textbox_transcript],
outputs=[text_output, audio_output],
fn=infer_from_audio,
cache_examples=False,)
with gr.Tab("Make prompt"):
gr.Markdown(make_prompt_md)
with gr.Row():
Expand All @@ -520,6 +526,11 @@ def main():
btn_2.click(make_npz_prompt,
inputs=[textbox2, upload_audio_prompt_2, record_audio_prompt_2, textbox_transcript2],
outputs=[text_output_2, prompt_output_2])
gr.Examples(examples=make_npz_prompt_examples,
inputs=[textbox2, upload_audio_prompt_2, record_audio_prompt_2, textbox_transcript2],
outputs=[text_output_2, prompt_output_2],
fn=make_npz_prompt,
cache_examples=False,)
with gr.Tab("Infer from prompt"):
gr.Markdown(infer_from_prompt_md)
with gr.Row():
Expand All @@ -540,6 +551,11 @@ def main():
btn_3.click(infer_from_prompt,
inputs=[textbox_3, language_dropdown_3, accent_dropdown_3, preset_dropdown_3, prompt_file],
outputs=[text_output_3, audio_output_3])
gr.Examples(examples=infer_from_prompt_examples,
inputs=[textbox_3, language_dropdown_3, accent_dropdown_3, preset_dropdown_3, prompt_file],
outputs=[text_output_3, audio_output_3],
fn=infer_from_prompt,
cache_examples=False,)
with gr.Tab("Infer long text"):
gr.Markdown("This is a long text generation demo. You can use this to generate long audio. ")
with gr.Row():
Expand Down
Binary file added prompts/en-1.wav
Binary file not shown.
Binary file added prompts/en-2.wav
Binary file not shown.
Binary file added prompts/ja-1.wav
Binary file not shown.
Binary file added prompts/ja-2.ogg
Binary file not shown.
Binary file added prompts/zh-1.wav
Binary file not shown.
Binary file added prompts/zh-2.wav
Binary file not shown.

0 comments on commit d27f645

Please sign in to comment.