Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

整理: _speaker_info()MetasStore へ移植 #1261

Merged
merged 23 commits into from
Jun 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
e19ce15
refactor: `_speaker_info` を `MetasStore` へ移植
tarepan May 18, 2024
f557d6e
Merge branch 'master' into refactor/metas_spk
tarepan May 22, 2024
d6084f3
Merge branch 'master' into refactor/metas_spk
tarepan May 22, 2024
811a565
fix: コンフリクト
tarepan May 22, 2024
0b557d7
Merge branch 'master' into refactor/metas_spk
tarepan May 25, 2024
2ef7a7d
Merge branch 'master' into refactor/metas_spk
tarepan May 28, 2024
a7ff66c
fix: マージミスを修正
tarepan May 28, 2024
bad2845
fix: lint
tarepan May 28, 2024
b48c08d
Merge branch 'master' into refactor/metas_spk
tarepan May 29, 2024
de713eb
refactor: `.speaker_info()` 引数を変更
tarepan May 29, 2024
f709d7e
fix: キャラクター情報のディレクトリ構造に関する制約を修正
tarepan May 29, 2024
bdd925b
Merge branch 'master' into refactor/metas_spk
tarepan Jun 2, 2024
1c2b2de
Merge branch 'master' into refactor/metas_spk
tarepan Jun 2, 2024
2b11c42
Merge branch 'master' into refactor/metas_spk
tarepan Jun 3, 2024
73d2bf2
Merge branch 'master' into refactor/metas_spk
tarepan Jun 10, 2024
377f737
Merge branch 'master' into refactor/metas_spk
tarepan Jun 18, 2024
5ae8971
Merge branch 'master' into refactor/metas_spk
tarepan Jun 19, 2024
1b37b1a
Merge branch 'master' into refactor/metas_spk
tarepan Jun 21, 2024
66a0928
Merge branch 'master' into refactor/metas_spk
tarepan Jun 23, 2024
7176892
fix: merge
tarepan Jun 23, 2024
1dedb04
fix: merge
tarepan Jun 23, 2024
548afd8
Apply suggestions from code review
Hiroshiba Jun 23, 2024
975937b
不要なimport
Hiroshiba Jun 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions voicevox_engine/app/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ def generate_app(
app = configure_middlewares(app, cors_policy_mode, allow_origin)
app = configure_global_exception_handlers(app)

metas_store = MetasStore(speaker_info_dir)
resource_manager = ResourceManager(is_development())
resource_manager.register_dir(speaker_info_dir)
metas_store = MetasStore(speaker_info_dir, resource_manager)

app.include_router(
generate_tts_pipeline_router(
Expand All @@ -78,9 +78,7 @@ def generate_app(
generate_preset_router(preset_manager, verify_mutability_allowed)
)
app.include_router(
generate_speaker_router(
core_manager, resource_manager, metas_store, speaker_info_dir
)
generate_speaker_router(core_manager, resource_manager, metas_store)
)
if engine_manifest.supported_features.manage_library:
app.include_router(
Expand Down
121 changes: 10 additions & 111 deletions voicevox_engine/app/routers/character.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
"""話者情報機能を提供する API Router"""

from pathlib import Path
from typing import Annotated, Literal, TypeAlias
from typing import Annotated

from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import FileResponse
from pydantic.json_schema import SkipJsonSchema

from voicevox_engine.core.core_initializer import CoreManager
from voicevox_engine.metas.Metas import Speaker, SpeakerInfo
from voicevox_engine.metas.MetasStore import MetasStore, filter_characters_and_styles
from voicevox_engine.metas.MetasStore import MetasStore, ResourceFormat
from voicevox_engine.resource_manager import ResourceManager, ResourceManagerError

RESOURCE_ENDPOINT = "_resources"
ResourceFormat: TypeAlias = Literal["base64", "url"]


async def _get_resource_baseurl(request: Request) -> str:
Expand All @@ -24,7 +22,6 @@ def generate_speaker_router(
core_manager: CoreManager,
resource_manager: ResourceManager,
metas_store: MetasStore,
speaker_info_dir: Path,
) -> APIRouter:
"""話者情報 API Router を生成する"""
router = APIRouter(tags=["その他"])
Expand All @@ -47,116 +44,16 @@ def speaker_info(
指定されたspeaker_uuidの話者に関する情報をjson形式で返します。
画像や音声はresource_formatで指定した形式で返されます。
"""
return _speaker_info(
version = core_version or core_manager.latest_version()
core = core_manager.get_core(version)
return metas_store.speaker_info(
speaker_uuid=speaker_uuid,
speaker_or_singer="speaker",
core_version=core_version,
core_characters=core.characters,
resource_baseurl=resource_baseurl,
resource_format=resource_format,
)

# FIXME: この関数をどこかに切り出す
def _speaker_info(
speaker_uuid: str,
speaker_or_singer: Literal["speaker", "singer"],
core_version: str | None,
resource_baseurl: str,
resource_format: ResourceFormat,
) -> SpeakerInfo:
# エンジンに含まれる話者メタ情報は、次のディレクトリ構造に従わなければならない:
# {root_dir}/
# character_info/
# {speaker_uuid_0}/
# policy.md
# portrait.png
# icons/
# {id_0}.png
# {id_1}.png
# ...
# portraits/
# {id_0}.png
# {id_1}.png
# ...
# voice_samples/
# {id_0}_001.wav
# {id_0}_002.wav
# {id_0}_003.wav
# {id_1}_001.wav
# ...
# {speaker_uuid_1}/
# ...

version = core_version or core_manager.latest_version()

# 該当話者を検索する
core_characters = core_manager.get_core(version).characters
characters = metas_store.load_combined_metas(core_characters)
speakers = filter_characters_and_styles(characters, speaker_or_singer)
speaker = next(
filter(lambda spk: spk.speaker_uuid == speaker_uuid, speakers), None
)
if speaker is None:
raise HTTPException(status_code=404, detail="該当する話者が見つかりません")

# 話者情報を取得する
try:
speaker_path = speaker_info_dir / speaker_uuid

# speaker policy
policy_path = speaker_path / "policy.md"
policy = policy_path.read_text("utf-8")

def _resource_str(path: Path) -> str:
resource_str = resource_manager.resource_str(
path, "hash" if resource_format == "url" else "base64"
)
if resource_format == "base64":
return resource_str
return f"{resource_baseurl}/{resource_str}"

# speaker portrait
portrait_path = speaker_path / "portrait.png"
portrait = _resource_str(portrait_path)

# スタイル情報を取得する
style_infos = []
for style in speaker.styles:
id = style.id

# style icon
style_icon_path = speaker_path / "icons" / f"{id}.png"
icon = _resource_str(style_icon_path)

# style portrait
style_portrait_path = speaker_path / "portraits" / f"{id}.png"
style_portrait = None
if style_portrait_path.exists():
style_portrait = _resource_str(style_portrait_path)

# voice samples
voice_samples: list[str] = []
for j in range(3):
num = str(j + 1).zfill(3)
voice_path = speaker_path / "voice_samples" / f"{id}_{num}.wav"
voice_samples.append(_resource_str(voice_path))

style_infos.append(
{
"id": id,
"icon": icon,
"portrait": style_portrait,
"voice_samples": voice_samples,
}
)
except (FileNotFoundError, ResourceManagerError):
msg = "追加情報が見つかりませんでした"
raise HTTPException(status_code=500, detail=msg)

spk_info = SpeakerInfo(
policy=policy, portrait=portrait, style_infos=style_infos
)
return spk_info

@router.get("/singers")
def singers(core_version: str | SkipJsonSchema[None] = None) -> list[Speaker]:
"""歌手情報の一覧を取得します"""
Expand All @@ -175,10 +72,12 @@ def singer_info(
指定されたspeaker_uuidの歌手に関する情報をjson形式で返します。
画像や音声はresource_formatで指定した形式で返されます。
"""
return _speaker_info(
version = core_version or core_manager.latest_version()
core = core_manager.get_core(version)
return metas_store.speaker_info(
speaker_uuid=speaker_uuid,
speaker_or_singer="singer",
core_version=core_version,
core_characters=core.characters,
resource_baseurl=resource_baseurl,
resource_format=resource_format,
)
Expand Down
113 changes: 111 additions & 2 deletions voicevox_engine/metas/MetasStore.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,22 @@

from dataclasses import dataclass
from pathlib import Path
from typing import Final, Literal
from typing import Final, Literal, TypeAlias

from fastapi import HTTPException
from pydantic import BaseModel, Field

from voicevox_engine.core.core_adapter import CoreCharacter, CoreCharacterStyle
from voicevox_engine.metas.Metas import (
Speaker,
SpeakerInfo,
SpeakerStyle,
SpeakerSupportedFeatures,
StyleId,
)
from voicevox_engine.resource_manager import ResourceManager, ResourceManagerError

ResourceFormat: TypeAlias = Literal["base64", "url"]


def cast_styles(cores: list[CoreCharacterStyle]) -> list[SpeakerStyle]:
Expand Down Expand Up @@ -68,13 +73,17 @@ class MetasStore:
話者やスタイルのメタ情報を管理する
"""

def __init__(self, engine_speakers_path: Path) -> None:
def __init__(
self, engine_speakers_path: Path, resource_manager: ResourceManager
) -> None:
"""
Parameters
----------
engine_speakers_path : Path
エンジンに含まれる話者メタ情報ディレクトリのパス。
"""
self._speakers_path = engine_speakers_path
self._resource_manager = resource_manager
# エンジンに含まれる各話者のメタ情報
self._loaded_metas: dict[str, _EngineSpeaker] = {
folder.name: _EngineSpeaker.model_validate_json(
Expand Down Expand Up @@ -111,6 +120,106 @@ def load_combined_metas(
)
return characters

def speaker_info(
self,
speaker_uuid: str,
speaker_or_singer: Literal["speaker", "singer"],
core_characters: list[CoreCharacter],
resource_baseurl: str,
resource_format: ResourceFormat,
) -> SpeakerInfo:
# キャラクター情報は以下のディレクトリ構造に従わなければならない。
# {engine_speakers_path}/
# {speaker_uuid_0}/
# policy.md
# portrait.png
# icons/
# {id_0}.png
# {id_1}.png
# ...
# portraits/
# {id_0}.png
# {id_1}.png
# ...
# voice_samples/
# {id_0}_001.wav
# {id_0}_002.wav
# {id_0}_003.wav
# {id_1}_001.wav
# ...
# {speaker_uuid_1}/
# ...

# 該当話者を検索する
characters = self.load_combined_metas(core_characters)
speakers = filter_characters_and_styles(characters, speaker_or_singer)
speaker = next(
filter(lambda spk: spk.speaker_uuid == speaker_uuid, speakers), None
)
if speaker is None:
# FIXME: HTTPExceptionはこのファイルとドメインが合わないので辞める
raise HTTPException(status_code=404, detail="該当する話者が見つかりません")

# 話者情報を取得する
try:
speaker_path = self._speakers_path / speaker_uuid

# speaker policy
policy_path = speaker_path / "policy.md"
policy = policy_path.read_text("utf-8")

def _resource_str(path: Path) -> str:
resource_str = self._resource_manager.resource_str(
path, "hash" if resource_format == "url" else "base64"
)
if resource_format == "base64":
return resource_str
return f"{resource_baseurl}/{resource_str}"

# speaker portrait
portrait_path = speaker_path / "portrait.png"
portrait = _resource_str(portrait_path)

# スタイル情報を取得する
style_infos = []
for style in speaker.styles:
id = style.id

# style icon
style_icon_path = speaker_path / "icons" / f"{id}.png"
icon = _resource_str(style_icon_path)

# style portrait
style_portrait_path = speaker_path / "portraits" / f"{id}.png"
style_portrait = None
if style_portrait_path.exists():
style_portrait = _resource_str(style_portrait_path)

# voice samples
voice_samples: list[str] = []
for j in range(3):
num = str(j + 1).zfill(3)
voice_path = speaker_path / "voice_samples" / f"{id}_{num}.wav"
voice_samples.append(_resource_str(voice_path))

style_infos.append(
{
"id": id,
"icon": icon,
"portrait": style_portrait,
"voice_samples": voice_samples,
}
)
except (FileNotFoundError, ResourceManagerError):
# FIXME: HTTPExceptionはこのファイルとドメインが合わないので辞める
msg = "追加情報が見つかりませんでした"
raise HTTPException(status_code=500, detail=msg)

spk_info = SpeakerInfo(
policy=policy, portrait=portrait, style_infos=style_infos
)
return spk_info

def talk_characters(self, core_characters: list[CoreCharacter]) -> list[Speaker]:
"""話せるキャラクターの情報の一覧を取得する。"""
characters = self.load_combined_metas(core_characters)
Expand Down