Skip to content

Commit

Permalink
refactor: Python APIのexampleのCLI引数をdataclass化 (#881)
Browse files Browse the repository at this point in the history
次のFIXMEを解消する。

> ```py
>     # FIXME: 流石に多くなってきたので、`dataclass`化する
> ```
  • Loading branch information
qryxip authored Dec 1, 2024
1 parent 32d7aeb commit c61d5db
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 163 deletions.
153 changes: 76 additions & 77 deletions example/python/run-asyncio.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,110 +6,109 @@
import logging
from argparse import ArgumentParser
from pathlib import Path
from typing import Tuple

from voicevox_core import AccelerationMode, AudioQuery
from voicevox_core.asyncio import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile


@dataclasses.dataclass
class Args:
mode: AccelerationMode
vvm: Path
onnxruntime: str
dict_dir: Path
text: str
out: Path
style_id: int

@staticmethod
def parse_args() -> "Args":
argparser = ArgumentParser()
argparser.add_argument(
"--mode",
default="AUTO",
type=AccelerationMode,
help='モード ("AUTO", "CPU", "GPU")',
)
argparser.add_argument(
"vvm",
type=Path,
help="vvmファイルへのパス",
)
argparser.add_argument(
"--onnxruntime",
default=Onnxruntime.LIB_VERSIONED_FILENAME,
help="ONNX Runtimeのライブラリのfilename",
)
argparser.add_argument(
"--dict-dir",
default="./open_jtalk_dic_utf_8-1.11",
type=Path,
help="Open JTalkの辞書ディレクトリ",
)
argparser.add_argument(
"--text",
default="この音声は、ボイスボックスを使用して、出力されています。",
help="読み上げさせたい文章",
)
argparser.add_argument(
"--out",
default="./output.wav",
type=Path,
help="出力wavファイルのパス",
)
argparser.add_argument(
"--style-id",
default=0,
type=int,
help="話者IDを指定",
)
args = argparser.parse_args()
return Args(
args.mode,
args.vvm,
args.onnxruntime,
args.dict_dir,
args.text,
args.out,
args.style_id,
)


async def main() -> None:
logging.basicConfig(format="[%(levelname)s] %(name)s: %(message)s")
logger = logging.getLogger(__name__)
logger.setLevel("DEBUG")
logging.getLogger("voicevox_core_python_api").setLevel("DEBUG")
logging.getLogger("voicevox_core").setLevel("DEBUG")

(
acceleration_mode,
vvm_path,
onnxruntime_filename,
open_jtalk_dict_dir,
text,
out,
style_id,
) = parse_args()
args = Args.parse_args()

logger.info("%s", f"Loading ONNX Runtime ({onnxruntime_filename=})")
onnxruntime = await Onnxruntime.load_once(filename=onnxruntime_filename)
logger.info("%s", f"Loading ONNX Runtime ({args.onnxruntime=})")
onnxruntime = await Onnxruntime.load_once(filename=args.onnxruntime)

logger.debug("%s", f"{onnxruntime.supported_devices()=}")

logger.info("%s", f"Initializing ({acceleration_mode=}, {open_jtalk_dict_dir=})")
logger.info("%s", f"Initializing ({args.mode=}, {args.dict_dir=})")
synthesizer = Synthesizer(
onnxruntime,
await OpenJtalk.new(open_jtalk_dict_dir),
acceleration_mode=acceleration_mode,
onnxruntime, await OpenJtalk.new(args.dict_dir), acceleration_mode=args.mode
)

logger.debug("%s", f"{synthesizer.metas=}")
logger.debug("%s", f"{synthesizer.is_gpu_mode=}")

logger.info("%s", f"Loading `{vvm_path}`")
async with await VoiceModelFile.open(vvm_path) as model:
logger.info("%s", f"Loading `{args.vvm}`")
async with await VoiceModelFile.open(args.vvm) as model:
await synthesizer.load_voice_model(model)

logger.info("%s", f"Creating an AudioQuery from {text!r}")
audio_query = await synthesizer.audio_query(text, style_id)
logger.info("%s", f"Creating an AudioQuery from {args.text!r}")
audio_query = await synthesizer.audio_query(args.text, args.style_id)

logger.info("%s", f"Synthesizing with {display_as_json(audio_query)}")
wav = await synthesizer.synthesis(audio_query, style_id)

out.write_bytes(wav)
logger.info("%s", f"Wrote `{out}`")
wav = await synthesizer.synthesis(audio_query, args.style_id)


def parse_args() -> Tuple[AccelerationMode, Path, str, Path, str, Path, int]:
argparser = ArgumentParser()
argparser.add_argument(
"--mode",
default="AUTO",
type=AccelerationMode,
help='モード ("AUTO", "CPU", "GPU")',
)
argparser.add_argument(
"vvm",
type=Path,
help="vvmファイルへのパス",
)
argparser.add_argument(
"--onnxruntime",
default=Onnxruntime.LIB_VERSIONED_FILENAME,
help="ONNX Runtimeのライブラリのfilename",
)
argparser.add_argument(
"--dict-dir",
default="./open_jtalk_dic_utf_8-1.11",
type=Path,
help="Open JTalkの辞書ディレクトリ",
)
argparser.add_argument(
"--text",
default="この音声は、ボイスボックスを使用して、出力されています。",
help="読み上げさせたい文章",
)
argparser.add_argument(
"--out",
default="./output.wav",
type=Path,
help="出力wavファイルのパス",
)
argparser.add_argument(
"--style-id",
default=0,
type=int,
help="話者IDを指定",
)
args = argparser.parse_args()
# FIXME: 流石に多くなってきたので、`dataclass`化する
return (
args.mode,
args.vvm,
args.onnxruntime,
args.dict_dir,
args.text,
args.out,
args.style_id,
)
args.out.write_bytes(wav)
logger.info("%s", f"Wrote `{args.out}`")


def display_as_json(audio_query: AudioQuery) -> str:
Expand Down
Loading

0 comments on commit c61d5db

Please sign in to comment.