-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from dangvansam/feature/voice-clone
add clone voice from local file api and cli
- Loading branch information
Showing
9 changed files
with
301 additions
and
75 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,5 +53,4 @@ node_modules | |
pretrained-models/* | ||
*_pb2_grpc.py | ||
*_pb2.py | ||
poetry.lock | ||
web | ||
poetry.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
[tool.poetry] | ||
name = "viet-tts" | ||
name = "viettts" | ||
version = "0.1.0" | ||
description = "VietTTS: An Open-Source Vietnamese Text to Speech" | ||
authors = ["dangvansam <[email protected]>"] | ||
|
@@ -8,17 +8,14 @@ readme = "README.md" | |
[tool.poetry.dependencies] | ||
python = "^3.10" | ||
conformer = "0.3.2" | ||
deepspeed = "0.14.2" | ||
diffusers = "0.27.2" | ||
gradio = "4.32.2" | ||
hydra-core = "1.3.2" | ||
hyperpyyaml = "1.2.2" | ||
librosa = "0.10.2" | ||
networkx = "3.1" | ||
omegaconf = "2.3.0" | ||
onnx = "1.16.0" | ||
onnxruntime-gpu = "1.16.0" | ||
openai-whisper = "20231117" | ||
protobuf = "4.25" | ||
pydantic = "2.7.0" | ||
soundfile = "0.12.1" | ||
|
@@ -29,15 +26,16 @@ wget = "3.2" | |
fastapi = "0.111.0" | ||
fastapi-cli = "0.0.4" | ||
loguru = "0.7.2" | ||
natsort = "8.4.0" | ||
vinorm = "^2.0.7" | ||
huggingface-hub = "0.24.7" | ||
click = "^8.1.7" | ||
gunicorn = "^23.0.0" | ||
silero-vad = "^5.1.2" | ||
tiktoken = "^0.8.0" | ||
openai-whisper = "^20240930" | ||
|
||
[tool.poetry.scripts] | ||
viet-tts = "viettts.cli:cli" | ||
viettts = "viettts.cli:cli" | ||
|
||
[build-system] | ||
requires = ["poetry-core"] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,9 +14,9 @@ | |
MODEL_DIR = 'pretrained-models' | ||
|
||
@click.command('server') | ||
@click.option('-h', '--host', type=str, default='0.0.0.0') | ||
@click.option('-p', '--port', type=int, default=8298) | ||
@click.option('-w', '--workers', type=int, default=1) | ||
@click.option('-h', '--host', type=str, default='0.0.0.0', help="The host address to bind the server to. Default is '0.0.0.0'.") | ||
@click.option('-p', '--port', type=int, default=8298, help="The port number to bind the server to. Default is 8298.") | ||
@click.option('-w', '--workers', type=int, default=1, help="The number of worker processes to handle requests. Default is 1.") | ||
def start_server(host: str, port: int, workers: int): | ||
"""Start API server (OpenAI TTS API compatible). | ||
|
@@ -37,17 +37,14 @@ def start_server(host: str, port: int, workers: int): | |
|
||
|
||
@click.command('synthesis') | ||
@click.option('-t', "--text", type=str, required=True) | ||
@click.option('-v', "--voice", type=str, default='1') | ||
@click.option('-s', "--speed", type=float, default=1) | ||
@click.option('-o', "--output", type=str, default='output.wav') | ||
@click.option('-t', "--text", type=str, required=True, help="The input text to synthesize into speech.") | ||
@click.option('-v', "--voice", type=str, default='1', help="The voice ID or file path to clone the voice from. Default is '1'.") | ||
@click.option('-s', "--speed", type=float, default=1, help="The speed multiplier for the speech. Default is 1 (normal speed).") | ||
@click.option('-o', "--output", type=str, default='output.wav', help="The file path to save the synthesized audio. Default is 'output.wav'.") | ||
def synthesis(text: str, voice: str, speed: float, output: str): | ||
"""Synthesis audio from text and save to file. | ||
Usage: | ||
viettts synthesis --text 'Xin chào VietTTS' --voice nu-nhe-nhang --output test_nu-nhe-nhang.wav | ||
viettts synthesis --text 'Chào bạn đến với Hà Nội' --voice 8 --speed 1.2 --output test_voice_8_speed_1.2.wav | ||
viettts synthesis --text 'Bạn có thể sao chép giọng sẵn có' --voice Downloads/audio.wav | ||
Usage: viettts synthesis --text 'Xin chào VietTTS' --voice nu-nhe-nhang --voice 8 --speed 1.2 --output test_nu-nhe-nhang.wav | ||
""" | ||
logger.info("Starting synthesis") | ||
st = time.perf_counter() | ||
|
@@ -107,7 +104,8 @@ def cli(): | |
""" | ||
VietTTS CLI v0.1.0 | ||
Vietnamese Text To Speech and Voice Clone - License: Apache 2.0 - Author: <dangvansam [email protected]> | ||
Vietnamese Text To Speech and Voice Clone | ||
License: Apache 2.0 - Author: <dangvansam [email protected]> | ||
""" | ||
pass | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.