Updated UI

Vebrun · Aug 27, 2023 · d27f645 · d27f645
1 parent 1712a6f
commit d27f645
Show file tree

Hide file tree

Showing 9 changed files with 45 additions and 0 deletions.
diff --git a/descriptions.py b/descriptions.py
@@ -23,4 +23,9 @@
 You need to **"Make prompt"** first, and upload the encoded prompt (a `.npz` file)
 """
 
+long_text_md = """
+Very long text is chunked into several sentences, and each sentence is synthesized separately.<br>
+Please make a prompt or use a preset prompt to infer long text.
+"""
+
 long_text_example = "Just a few years ago, there were no legions of deep learning scientists developing intelligent products and services at major companies and startups. When we entered the field, machine learning did not command headlines in daily newspapers. Our parents had no idea what machine learning was, let alone why we might prefer it to a career in medicine or law. Machine learning was a blue skies academic discipline whose industrial significance was limited to a narrow set of real-world applications, including speech recognition and computer vision. Moreover, many of these applications required so much domain knowledge that they were often regarded as entirely separate areas for which machine learning was one small component. At that time, neural networks—the predecessors of the deep learning methods that we focus on in this book—were generally regarded as outmoded."
diff --git a/examples.py b/examples.py
@@ -0,0 +1,24 @@
+infer_from_audio_examples = [
+    ["This is how this machine has taken my voice.", 'English', 'no-accent', "prompts/en-2.wav", None, "Wow, look at that! That's no ordinary Teddy bear!"],
+    ["我喜欢抽电子烟，尤其是锐刻五代。", '中文', 'no-accent', "prompts/zh-1.wav", None, "今天我很荣幸，"],
+    ["私の声を真似するのはそんなに面白いですか？", '日本語', 'no-accent', "prompts/ja-2.ogg", None, "初めまして、朝武よしのです。"],
+    ["你可以听得出来我有多困。", '中文', 'no-accent', "prompts/en-1.wav", None, ""],
+    ["この文は、クロスリンガル合成の例です。", '日本語', 'no-accent', "prompts/zh-2.wav", None, ""],
+    ["Actually, I can't speak English, but this machine helped me do it.", 'English', 'no-accent', "prompts/ja-1.wav", None, ""],
+]
+
+make_npz_prompt_examples = [
+    ["Gem-trader", "prompts/en-2.wav", None, "Wow, look at that! That's no ordinary Teddy bear!"],
+    ["Ding Zhen", "prompts/zh-1.wav", None, "今天我很荣幸，"],
+    ["Yoshino", "prompts/ja-2.ogg", None, "初めまして、朝武よしのです。"],
+    ["Sleepy-woman", "prompts/en-1.wav", None, ""],
+    ["Yae", "prompts/zh-2.wav", None, ""],
+    ["Cafe", "prompts/ja-1.wav", None, ""],
+]
+
+infer_from_prompt_examples = [
+    ["A prompt contains voice, prosody and emotion information of a certain speaker.", "English", "no-accent", "vctk_1", None],
+    ["This prompt is made with an audio of three seconds.", "English", "no-accent", "librispeech_1", None],
+    ["This prompt is made with Chinese speech", "English", "no-accent", "seel", None],
+]
+
diff --git a/launch-ui.py b/launch-ui.py
@@ -34,6 +34,7 @@
 from utils.g2p import PhonemeBpeTokenizer
 from descriptions import *
 from macros import *
+from examples import *
 
 import gradio as gr
 import whisper
@@ -500,6 +501,11 @@ def main():
                     btn_mp.click(make_npz_prompt,
                                 inputs=[textbox_mp, upload_audio_prompt, record_audio_prompt, textbox_transcript],
                                 outputs=[text_output, prompt_output])
+            gr.Examples(examples=infer_from_audio_examples,
+                        inputs=[textbox, language_dropdown, accent_dropdown, upload_audio_prompt, record_audio_prompt, textbox_transcript],
+                        outputs=[text_output, audio_output],
+                        fn=infer_from_audio,
+                        cache_examples=False,)
         with gr.Tab("Make prompt"):
             gr.Markdown(make_prompt_md)
             with gr.Row():
@@ -520,6 +526,11 @@ def main():
                     btn_2.click(make_npz_prompt,
                               inputs=[textbox2, upload_audio_prompt_2, record_audio_prompt_2, textbox_transcript2],
                               outputs=[text_output_2, prompt_output_2])
+            gr.Examples(examples=make_npz_prompt_examples,
+                        inputs=[textbox2, upload_audio_prompt_2, record_audio_prompt_2, textbox_transcript2],
+                        outputs=[text_output_2, prompt_output_2],
+                        fn=make_npz_prompt,
+                        cache_examples=False,)
         with gr.Tab("Infer from prompt"):
             gr.Markdown(infer_from_prompt_md)
             with gr.Row():
@@ -540,6 +551,11 @@ def main():
                     btn_3.click(infer_from_prompt,
                               inputs=[textbox_3, language_dropdown_3, accent_dropdown_3, preset_dropdown_3, prompt_file],
                               outputs=[text_output_3, audio_output_3])
+            gr.Examples(examples=infer_from_prompt_examples,
+                        inputs=[textbox_3, language_dropdown_3, accent_dropdown_3, preset_dropdown_3, prompt_file],
+                        outputs=[text_output_3, audio_output_3],
+                        fn=infer_from_prompt,
+                        cache_examples=False,)
         with gr.Tab("Infer long text"):
             gr.Markdown("This is a long text generation demo. You can use this to generate long audio. ")
             with gr.Row():

diff --git a/prompts/en-1.wav b/prompts/en-1.wav
diff --git a/prompts/en-2.wav b/prompts/en-2.wav
diff --git a/prompts/ja-1.wav b/prompts/ja-1.wav
diff --git a/prompts/ja-2.ogg b/prompts/ja-2.ogg
diff --git a/prompts/zh-1.wav b/prompts/zh-1.wav
diff --git a/prompts/zh-2.wav b/prompts/zh-2.wav