From 537d9b23225780af207a4f1f59ed65d9a34b4af6 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Fri, 5 Jul 2024 07:15:15 +0900 Subject: [PATCH] =?UTF-8?q?change:=20`Onnxruntime`=E5=9E=8B=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=E3=81=97=E3=80=81=E3=81=9D=E3=81=93=E3=81=8B?= =?UTF-8?q?=E3=82=89`dlopen`/`LoadLibrary*`=E3=82=92=E8=A1=8C=E3=81=86=20(?= =?UTF-8?q?#802)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * change: `Onnxruntime`型を追加し、そこから`dlopen`/`LoadLibrary*`を行う * KotlinのexampleにFIXMEを追加 * "ONNX Runtimeを表す" → "AIエンジンの" * ドキュメントとコメントを追加 * フィーチャをリネーム * fixup! フィーチャをリネーム * fixup! フィーチャをリネーム * fixup! フィーチャをリネーム * fixup! フィーチャをリネーム * fixup! フィーチャをリネーム * voicevox-ortを更新 * `sed`のし損ねを修正 * ドキュメントやコメントを更新 * C APIに定数のgetterを追加 * voicevox-ortを更新 * "filename"を"ファイル名(モジュール名)もしくはファイルパス"に * C APIのドキュメントに"Availability"のセクションを追加 * フィーチャに関するドキュメントを更新 https://github.com/VOICEVOX/voicevox_core/pull/802#issuecomment-2198751933 * C APIのビルドに関するドキュメントのヘッダファイル周りを更新 * voicevox-ortを更新 * `JavaStr`を`CStr::to_str`しない * fixup! `JavaStr`を`CStr::to_str`しない * 不要な依存を削除 * `link-onnxruntime`で`cargo test`が通るようにする * fixup! `link-onnxruntime`で`cargo test`が通るようにする * fixup! `link-onnxruntime`で`cargo test`が通るようにする --- .github/workflows/build_and_deploy.yml | 40 +- .github/workflows/generate_document.yml | 4 +- .github/workflows/test.yml | 21 +- Cargo.lock | 124 +++++- Cargo.toml | 11 +- README.md | 12 +- crates/test_util/Cargo.toml | 2 + crates/test_util/build.rs | 34 ++ crates/test_util/src/lib.rs | 3 + crates/voicevox_core/Cargo.toml | 16 +- .../src/__internal/doctest_fixtures.rs | 12 +- crates/voicevox_core/src/blocking.rs | 11 +- crates/voicevox_core/src/devices.rs | 32 -- crates/voicevox_core/src/error.rs | 10 + crates/voicevox_core/src/infer.rs | 4 +- crates/voicevox_core/src/infer/runtimes.rs | 4 +- .../src/infer/runtimes/onnxruntime.rs | 385 +++++++++++++++++- crates/voicevox_core/src/infer/session_set.rs | 3 +- crates/voicevox_core/src/lib.rs | 62 +++ crates/voicevox_core/src/status.rs | 36 +- crates/voicevox_core/src/synthesizer.rs | 122 ++++-- crates/voicevox_core/src/tokio.rs | 11 +- crates/voicevox_core_c_api/Cargo.toml | 5 + crates/voicevox_core_c_api/build.rs | 1 + crates/voicevox_core_c_api/cbindgen.toml | 30 +- .../include/voicevox_core.h | 200 ++++++++- crates/voicevox_core_c_api/src/c_impls.rs | 70 +++- .../src/compatible_engine.rs | 25 +- crates/voicevox_core_c_api/src/helpers.rs | 19 +- crates/voicevox_core_c_api/src/lib.rs | 210 +++++++++- crates/voicevox_core_c_api/src/result_code.rs | 5 + .../tests/e2e/assert_cdylib.rs | 12 +- .../voicevox_core_c_api/tests/e2e/log_mask.rs | 7 + crates/voicevox_core_c_api/tests/e2e/main.rs | 1 + .../tests/e2e/snapshots.toml | 32 +- .../tests/e2e/testcases/compatible_engine.rs | 13 +- ...ble_engine_load_model_before_initialize.rs | 1 + .../tests/e2e/testcases/global_info.rs | 22 +- .../tests/e2e/testcases/simple_tts.rs | 11 + .../testcases/synthesizer_new_output_json.rs | 11 + .../e2e/testcases/tts_via_audio_query.rs | 11 + .../tests/e2e/testcases/user_dict_load.rs | 11 + .../e2e/testcases/user_dict_manipulate.rs | 1 + crates/voicevox_core_java_api/Cargo.toml | 3 +- .../jp/hiroshiba/voicevoxcore/GlobalInfo.java | 18 +- .../hiroshiba/voicevoxcore/Onnxruntime.java | 132 ++++++ .../hiroshiba/voicevoxcore/Synthesizer.java | 29 +- .../InitInferenceRuntimeException.java | 14 + .../jp/hiroshiba/voicevoxcore/InfoTest.java | 5 +- .../voicevoxcore/SynthesizerTest.java | 17 +- .../jp/hiroshiba/voicevoxcore/TestUtils.java | 10 + .../hiroshiba/voicevoxcore/UserDictTest.java | 3 +- crates/voicevox_core_java_api/src/common.rs | 1 + crates/voicevox_core_java_api/src/info.rs | 11 - crates/voicevox_core_java_api/src/lib.rs | 1 + .../voicevox_core_java_api/src/onnxruntime.rs | 56 +++ .../voicevox_core_java_api/src/synthesizer.rs | 10 +- crates/voicevox_core_python_api/Cargo.toml | 3 +- .../python/test/conftest.py | 8 + .../python/test/test_asyncio_metas.py | 7 +- .../test/test_asyncio_user_dict_load.py | 5 +- .../python/test/test_blocking_metas.py | 7 +- .../test/test_blocking_user_dict_load.py | 5 +- ...est_pseudo_raii_for_asyncio_synthesizer.py | 11 +- ...st_pseudo_raii_for_blocking_synthesizer.py | 11 +- .../python/test/test_type_stub_consts.py | 50 +++ .../python/voicevox_core/__init__.py | 4 +- .../python/voicevox_core/_load_dlls.py | 1 + .../python/voicevox_core/_rust/__init__.pyi | 22 +- .../python/voicevox_core/_rust/asyncio.pyi | 75 ++++ .../python/voicevox_core/_rust/blocking.pyi | 75 ++++ .../python/voicevox_core/asyncio.py | 4 +- .../python/voicevox_core/blocking.py | 4 +- .../voicevox_core_python_api/src/convert.rs | 9 +- crates/voicevox_core_python_api/src/lib.rs | 171 +++++++- docs/apis/c_api/doxygen/Doxyfile | 3 +- docs/feature-options.md | 26 ++ docs/usage.md | 6 +- example/cpp/unix/simple_tts.cpp | 11 +- example/cpp/windows/simple_tts/simple_tts.cpp | 11 +- example/kotlin/README.md | 9 +- example/kotlin/app/src/main/kotlin/app/App.kt | 9 +- example/python/run-asyncio.py | 31 +- example/python/run.py | 31 +- 84 files changed, 2253 insertions(+), 323 deletions(-) create mode 100644 crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Onnxruntime.java create mode 100644 crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/exceptions/InitInferenceRuntimeException.java create mode 100644 crates/voicevox_core_java_api/src/onnxruntime.rs create mode 100644 crates/voicevox_core_python_api/python/test/test_type_stub_consts.py create mode 100644 docs/feature-options.md diff --git a/.github/workflows/build_and_deploy.yml b/.github/workflows/build_and_deploy.yml index 8ac31c592..e9b5e9eb5 100644 --- a/.github/workflows/build_and_deploy.yml +++ b/.github/workflows/build_and_deploy.yml @@ -41,7 +41,14 @@ defaults: shell: bash jobs: - config: # 全 jobs で利用する定数の定義。実行対象の条件をフィルタリングする。 + # 全 jobs で利用する定数の定義。実行対象の条件をフィルタリングする。 + # + # c_release_format = plain-cdylib | ios-xcframework + # + # `plain-cdylib`の場合、動的ライブラリとその付属物をZIPに固めたものをC APIとしてリリースする。 + # `ios-xcframework`の場合はiOS用のXCFrameworkをC APIとしてリリースする。また、ONNX Runtimeの + # リンク方法に関わるCargoフィーチャも`c_release_format`によって選択される。 + config: runs-on: ubuntu-latest outputs: includes: ${{ steps.strategy_matrix.outputs.includes }} @@ -57,6 +64,7 @@ jobs: "features": "", "target": "x86_64-pc-windows-msvc", "artifact_name": "windows-x64-cpu", + "c_release_format": "plain-cdylib", "whl_local_version": "cpu", "can_skip_in_simple_test": true }, @@ -65,6 +73,7 @@ jobs: "features": "directml", "target": "x86_64-pc-windows-msvc", "artifact_name": "windows-x64-directml", + "c_release_format": "plain-cdylib", "whl_local_version": "directml", "can_skip_in_simple_test": false }, @@ -73,6 +82,7 @@ jobs: "features": "cuda", "target": "x86_64-pc-windows-msvc", "artifact_name": "windows-x64-cuda", + "c_release_format": "plain-cdylib", "whl_local_version": "cuda", "can_skip_in_simple_test": true }, @@ -81,6 +91,7 @@ jobs: "features": "", "target": "i686-pc-windows-msvc", "artifact_name": "windows-x86-cpu", + "c_release_format": "plain-cdylib", "whl_local_version": "cpu", "can_skip_in_simple_test": true }, @@ -89,6 +100,7 @@ jobs: "features": "", "target": "x86_64-unknown-linux-gnu", "artifact_name": "linux-x64-cpu", + "c_release_format": "plain-cdylib", "whl_local_version": "cpu", "can_skip_in_simple_test": true }, @@ -97,6 +109,7 @@ jobs: "features": "cuda", "target": "x86_64-unknown-linux-gnu", "artifact_name": "linux-x64-gpu", + "c_release_format": "plain-cdylib", "whl_local_version": "cuda", "can_skip_in_simple_test": false }, @@ -105,6 +118,7 @@ jobs: "features": "", "target": "aarch64-unknown-linux-gnu", "artifact_name": "linux-arm64-cpu", + "c_release_format": "plain-cdylib", "whl_local_version": "cpu", "can_skip_in_simple_test": true }, @@ -113,6 +127,7 @@ jobs: "features": "", "target": "aarch64-linux-android", "artifact_name": "android-arm64-cpu", + "c_release_format": "plain-cdylib", "can_skip_in_simple_test": true }, { @@ -120,6 +135,7 @@ jobs: "features": "", "target": "x86_64-linux-android", "artifact_name": "android-x86_64-cpu", + "c_release_format": "plain-cdylib", "can_skip_in_simple_test": true }, { @@ -127,6 +143,7 @@ jobs: "features": "", "target": "aarch64-apple-darwin", "artifact_name": "osx-arm64-cpu", + "c_release_format": "plain-cdylib", "whl_local_version": "cpu", "can_skip_in_simple_test": false }, @@ -135,6 +152,7 @@ jobs: "features": "", "target": "x86_64-apple-darwin", "artifact_name": "osx-x64-cpu", + "c_release_format": "plain-cdylib", "whl_local_version": "cpu", "can_skip_in_simple_test": true }, @@ -143,6 +161,7 @@ jobs: "features": "", "target": "aarch64-apple-ios", "artifact_name": "ios-arm64-cpu", + "c_release_format": "ios-xcframework", "can_skip_in_simple_test": true }, { @@ -150,6 +169,7 @@ jobs: "features": "", "target": "aarch64-apple-ios-sim", "artifact_name": "ios-arm64-cpu-sim", + "c_release_format": "ios-xcframework", "can_skip_in_simple_test": true }, { @@ -157,6 +177,7 @@ jobs: "features": "", "target": "x86_64-apple-ios", "artifact_name": "ios-x64-cpu", + "c_release_format": "ios-xcframework", "can_skip_in_simple_test": true } ]' @@ -244,8 +265,12 @@ jobs: - name: build voicevox_core_c_api shell: bash run: | + case ${{ matrix.c_release_format }} in + plain-cdylib) linking=load-onnxruntime ;; + ios-xcframework) linking=link-onnxruntime ;; + esac function build() { - cargo build -p voicevox_core_c_api -vv --features ${{ matrix.features }}, --target ${{ matrix.target }} --release + cargo build -p voicevox_core_c_api -vv --features "$linking",${{ matrix.features }} --target ${{ matrix.target }} --release } if ${{ !inputs.is_production }}; then build @@ -285,7 +310,12 @@ jobs: - name: Organize artifact run: | mkdir -p "artifact/${{ env.ASSET_NAME }}" - cp -v crates/voicevox_core_c_api/include/voicevox_core.h "artifact/${{ env.ASSET_NAME }}" + case ${{ matrix.c_release_format }} in + plain-cdylib) feature=VOICEVOX_LOAD_ONNXRUNTIME ;; + ios-xcframework) feature=VOICEVOX_LINK_ONNXRUNTIME ;; + esac + sed 's:^//\(#define '"$feature"'\)$:\1:' crates/voicevox_core_c_api/include/voicevox_core.h \ + > "artifact/${{ env.ASSET_NAME }}/voicevox_core.h" cp -v target/${{ matrix.target }}/release/*voicevox_core.{dll,so,dylib} "artifact/${{ env.ASSET_NAME }}" || true cp -v target/${{ matrix.target }}/release/voicevox_core.dll.lib "artifact/${{ env.ASSET_NAME }}/voicevox_core.lib" || true cp -v -n target/${{ matrix.target }}/release/{,lib}onnxruntime*.{dll,so.*,so,dylib} "artifact/${{ env.ASSET_NAME }}" || true @@ -305,7 +335,7 @@ jobs: ESIGNERCKA_PASSWORD: ${{ secrets.ESIGNERCKA_PASSWORD }} ESIGNERCKA_TOTP_SECRET: ${{ secrets.ESIGNERCKA_TOTP_SECRET }} - name: Upload artifact to build XCFramework - if: contains(matrix.target, 'ios') + if: matrix.c_release_format == 'ios-xcframework' uses: actions/upload-artifact@v4 with: name: voicevox_core-${{ matrix.target }} @@ -315,7 +345,7 @@ jobs: cd artifact 7z a "../${{ env.ASSET_NAME }}.zip" "${{ env.ASSET_NAME }}" - name: Upload to Release - if: fromJson(needs.config.outputs.deploy) && !contains(matrix.target, 'ios') + if: fromJson(needs.config.outputs.deploy) && matrix.c_release_format == 'plain-cdylib' uses: softprops/action-gh-release@v2 with: prerelease: true diff --git a/.github/workflows/generate_document.yml b/.github/workflows/generate_document.yml index 039800df1..c4af6b80b 100644 --- a/.github/workflows/generate_document.yml +++ b/.github/workflows/generate_document.yml @@ -64,9 +64,7 @@ jobs: with: working-directory: "docs/apis/c_api/doxygen" - name: Build voicevox_core_python_api - run: | - cargo build -p voicevox_core_c_api -vv - maturin develop --manifest-path ./crates/voicevox_core_python_api/Cargo.toml --locked + run: maturin develop --manifest-path ./crates/voicevox_core_python_api/Cargo.toml --locked # https://github.com/readthedocs/sphinx-autoapi/issues/405 - name: Workaround to make Sphinx recognize `_rust` as a module run: touch ./crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 14bfbddd0..8550fbfca 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -72,8 +72,10 @@ jobs: with: python-version: "3.8" - uses: Swatinem/rust-cache@v2 - - run: cargo clippy -vv --all-features --tests -- -D clippy::all -D warnings --no-deps - - run: cargo clippy -vv --all-features -- -D clippy::all -D warnings --no-deps + - run: cargo clippy -vv --features directml,cuda --tests -- -D clippy::all -D warnings --no-deps + - run: cargo clippy -vv --features directml,cuda -- -D clippy::all -D warnings --no-deps + - run: cargo clippy -vv -p voicevox_core -p voicevox_core_c_api --features link-onnxruntime,directml,cuda --tests -- -D clippy::all -D warnings --no-deps + - run: cargo clippy -vv -p voicevox_core -p voicevox_core_c_api --features link-onnxruntime,directml,cuda -- -D clippy::all -D warnings --no-deps - run: cargo fmt -- --check rust-unit-test: @@ -134,7 +136,7 @@ jobs: - uses: Swatinem/rust-cache@v2 with: key: "cargo-integration-test-cache-${{ matrix.features }}-${{ matrix.os }}" - - name: Run cargo integration test + - name: Run cargo integration test (load-onnxruntime) run: RUST_BACKTRACE=full cargo test --test "*" -vv --features ,${{ matrix.features }} -- --include-ignored c-header: @@ -193,11 +195,13 @@ jobs: - name: Install cargo-binstall uses: taiki-e/install-action@cargo-binstall - name: build voicevox_core_c_api - run: cargo build -p voicevox_core_c_api -vv + run: cargo build -p voicevox_core_c_api --features load-onnxruntime -vv - name: 必要なfileをunix用exampleのディレクトリに移動させる run: | mkdir -p example/cpp/unix/voicevox_core/ - cp -v crates/voicevox_core_c_api/include/voicevox_core.h example/cpp/unix/voicevox_core/ + sed 's:^//\(#define VOICEVOX_LOAD_ONNXRUNTIME\)$:\1:' \ + crates/voicevox_core_c_api/include/voicevox_core.h \ + > example/cpp/unix/voicevox_core/voicevox_core.h cp -v target/debug/libvoicevox_core.{so,dylib} example/cpp/unix/voicevox_core/ || true cp -v target/debug/libonnxruntime.so.* example/cpp/unix/voicevox_core/ || true cp -v target/debug/libonnxruntime.*.dylib example/cpp/unix/voicevox_core/ || true @@ -235,11 +239,14 @@ jobs: - name: Install cargo-binstall uses: taiki-e/install-action@cargo-binstall - name: build voicevox_core_c_api - run: cargo build -p voicevox_core_c_api -vv + run: cargo build -p voicevox_core_c_api --features load-onnxruntime -vv - name: 必要なfileをexampleのディレクトリに移動させる + shell: bash run: | mkdir -p example/cpp/windows/simple_tts/lib/x64 - cp -v crates/voicevox_core_c_api/include/voicevox_core.h example/cpp/windows/simple_tts/ + sed 's:^//\(#define VOICEVOX_LOAD_ONNXRUNTIME\)$:\1:' \ + crates/voicevox_core_c_api/include/voicevox_core.h \ + > example/cpp/windows/simple_tts/voicevox_core.h cp target/debug/voicevox_core.dll.lib example/cpp/windows/simple_tts/lib/x64/voicevox_core.lib - name: Add MSBuild to PATH diff --git a/Cargo.lock b/Cargo.lock index d3846b8a0..a612d57e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -362,6 +362,32 @@ name = "camino" version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo-platform" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24b1f0365a6c6bb4020cd05806fd0d33c44d38046b8bd7f0e40814b9763cabfc" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo_metadata" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d886547e41f740c616ae73108f6eb70afe6d940c7bc697cb30f13daec073037" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", + "thiserror", +] [[package]] name = "cbindgen" @@ -443,7 +469,7 @@ checksum = "77ed9a53e5d4d9c573ae844bfac6872b159cb1d1585a83b29e7a64b7eef7332a" dependencies = [ "glob", "libc", - "libloading", + "libloading 0.7.3", ] [[package]] @@ -599,6 +625,26 @@ dependencies = [ "windows-sys 0.42.0", ] +[[package]] +name = "const_format" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a214c7af3d04997541b18d432afaff4c455e79e2029079647e72fc2bd27673" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f6ff08fd20f4f299298a28e2dfa8a8ba1036e6cd2460ac1de7b425d76f2500" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "constant_time_eq" version = "0.1.5" @@ -1100,10 +1146,11 @@ dependencies = [ [[package]] name = "fs-err" -version = "2.9.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0845fa252299212f0389d64ba26f34fa32cfe41588355f21ed507c59a0f64541" +checksum = "88a41f105fe1d5b6b34b2055e3dc59bb79b46b48b2040b9e6c7b4b5de097aa41" dependencies = [ + "autocfg", "tokio", ] @@ -1675,6 +1722,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "libloading" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" +dependencies = [ + "cfg-if", + "windows-targets 0.48.0", +] + [[package]] name = "libm" version = "0.2.6" @@ -1990,9 +2047,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "opaque-debug" @@ -2502,6 +2559,26 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "ref-cast" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf0a6f84d5f1d581da8b41b47ec8600871962f2a528115b542b362d4b744931" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "regex" version = "1.10.0" @@ -2793,21 +2870,24 @@ name = "semver" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4" +dependencies = [ + "serde", +] [[package]] name = "serde" -version = "1.0.164" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.164" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", @@ -3136,16 +3216,18 @@ dependencies = [ "anyhow", "bindgen 0.69.4", "camino", + "cargo_metadata", "flate2", "fs-err", "indoc", - "libloading", + "libloading 0.7.3", "once_cell", "reqwest", "serde", "serde_json", "tar", "tokio", + "voicevox-ort", "zip", ] @@ -3465,6 +3547,12 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + [[package]] name = "unindent" version = "0.2.3" @@ -3535,11 +3623,14 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "voicevox-ort" version = "2.0.0-rc.2" -source = "git+https://github.com/VOICEVOX/ort.git?rev=a2d6ae22327869e896bf4c16828734d09516d2d9#a2d6ae22327869e896bf4c16828734d09516d2d9" +source = "git+https://github.com/VOICEVOX/ort.git?rev=07c047c449b959d8f76593046e139bae520d59c3#07c047c449b959d8f76593046e139bae520d59c3" dependencies = [ + "anyhow", "half", "js-sys", + "libloading 0.8.3", "ndarray", + "once_cell", "thiserror", "tracing", "voicevox-ort-sys", @@ -3549,7 +3640,7 @@ dependencies = [ [[package]] name = "voicevox-ort-sys" version = "2.0.0-rc.2" -source = "git+https://github.com/VOICEVOX/ort.git?rev=a2d6ae22327869e896bf4c16828734d09516d2d9#a2d6ae22327869e896bf4c16828734d09516d2d9" +source = "git+https://github.com/VOICEVOX/ort.git?rev=07c047c449b959d8f76593046e139bae520d59c3#07c047c449b959d8f76593046e139bae520d59c3" dependencies = [ "flate2", "sha2", @@ -3564,6 +3655,7 @@ dependencies = [ "anyhow", "async_zip", "camino", + "const_format", "derive-getters", "derive-new", "derive_more", @@ -3584,6 +3676,7 @@ dependencies = [ "ouroboros", "pretty_assertions", "rayon", + "ref-cast", "regex", "rstest", "rstest_reuse", @@ -3616,6 +3709,7 @@ dependencies = [ "chrono", "clap 4.0.10", "colorchoice", + "const_format", "cstr", "derive-getters", "duct", @@ -3624,12 +3718,13 @@ dependencies = [ "inventory", "itertools 0.10.5", "libc", - "libloading", + "libloading 0.7.3", "libtest-mimic", "ndarray", "ndarray-stats", "once_cell", "process_path", + "ref-cast", "regex", "serde", "serde_json", @@ -3642,6 +3737,7 @@ dependencies = [ "tracing-subscriber", "typetag", "uuid", + "voicevox-ort", "voicevox_core", ] @@ -3652,6 +3748,7 @@ dependencies = [ "android_logger", "chrono", "derive_more", + "duplicate", "easy-ext", "jni", "once_cell", @@ -3681,6 +3778,7 @@ dependencies = [ "camino", "easy-ext", "log", + "once_cell", "pyo3", "pyo3-asyncio", "pyo3-log", diff --git a/Cargo.toml b/Cargo.toml index 4ea3d3757..b467c583e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,11 +13,13 @@ bindgen = "0.69.4" binstall-tar = "0.4.39" bytes = "1.1.0" camino = "1.1.6" +cargo_metadata = "0.18.1" cbindgen = "0.24.3" chrono = { version = "0.4.26", default-features = false } clap = "4.0.10" color-eyre = "0.6.2" colorchoice = "1.0.0" +const_format = "0.2.32" cstr = "0.2.11" # https://github.com/dtolnay/syn/issues/1502 derive-getters = "0.2.0" derive-new = "0.5.9" @@ -29,7 +31,7 @@ educe = "0.4.23" enum-map = "3.0.0-beta.1" eyre = "0.6.8" flate2 = "1.0.25" -fs-err = "2.9.0" +fs-err = "2.11.0" futures = "0.3.26" futures-core = "0.3.25" futures-util = "0.3.25" @@ -50,7 +52,7 @@ log = "0.4.17" ndarray = "0.15.6" ndarray-stats = "0.5.1" octocrab = { version = "0.19.0", default-features = false } -once_cell = "1.18.0" +once_cell = "1.19.0" ouroboros = "0.18.0" parse-display = "0.8.2" pretty_assertions = "1.3.0" @@ -60,11 +62,12 @@ pyo3-asyncio = "0.20.0" pyo3-log = "0.9.0" quote = "1.0.33" rayon = "1.6.1" +ref-cast = "1.0.23" regex = "1.10.0" reqwest = { version = "0.11.13", default-features = false } rstest = "0.15.0" rstest_reuse = "0.6.0" -serde = "1.0.145" +serde = "1.0.203" serde_json = "1.0.85" serde_with = "3.3.0" smallvec = "1.13.1" @@ -87,7 +90,7 @@ zip = "0.6.3" [workspace.dependencies.voicevox-ort] git = "https://github.com/VOICEVOX/ort.git" -rev = "a2d6ae22327869e896bf4c16828734d09516d2d9" +rev = "07c047c449b959d8f76593046e139bae520d59c3" [workspace.dependencies.open_jtalk] git = "https://github.com/VOICEVOX/open_jtalk-rs.git" diff --git a/README.md b/README.md index 4024e9721..ae60f587b 100644 --- a/README.md +++ b/README.md @@ -150,10 +150,18 @@ model フォルダにある onnx モデルはダミーのため、ノイズの ```bash # DLLをビルド -cargo build --release -p voicevox_core_c_api +cargo build --release -p voicevox_core_c_api --features load-onnxruntime ``` -DLL 用のヘッダファイルは [crates/voicevox_core_c_api/include/voicevox_core.h](https://github.com/VOICEVOX/voicevox_core/tree/main/crates/voicevox_core_c_api/include/voicevox_core.h) にあります。 +DLL 用のヘッダファイルの雛形は [crates/voicevox_core_c_api/include/voicevox_core.h](https://github.com/VOICEVOX/voicevox_core/tree/main/crates/voicevox_core_c_api/include/voicevox_core.h) にあります。 +詳しくは[feature-options.md](./docs/feature-options.md)を参照してください。 + +```bash +# ヘッダファイルを加工し、マクロ`VOICEVOX_LOAD_ONNXRUNTIME`を宣言 +sed 's:^//\(#define VOICEVOX_LOAD_ONNXRUNTIME\)$:\1:' \ + crates/voicevox_core_c_api/include/voicevox_core.h \ + > ./voicevox_core.h +``` ## コアライブラリのテスト diff --git a/crates/test_util/Cargo.toml b/crates/test_util/Cargo.toml index d113b57ce..fe0c2f6dd 100644 --- a/crates/test_util/Cargo.toml +++ b/crates/test_util/Cargo.toml @@ -13,6 +13,7 @@ anyhow.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } bindgen.workspace = true camino.workspace = true +cargo_metadata.workspace = true flate2.workspace = true fs-err.workspace = true indoc.workspace = true @@ -20,6 +21,7 @@ serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, features = ["preserve_order"] } reqwest = { workspace = true, features = ["rustls-tls"] } tar.workspace = true +voicevox-ort.workspace = true zip.workspace = true [lints.rust] diff --git a/crates/test_util/build.rs b/crates/test_util/build.rs index 79e3bc90e..3cdc88d78 100644 --- a/crates/test_util/build.rs +++ b/crates/test_util/build.rs @@ -6,6 +6,7 @@ use std::{ use anyhow::{anyhow, ensure}; use camino::{Utf8Path, Utf8PathBuf}; +use cargo_metadata::MetadataCommand; use flate2::read::GzDecoder; use indoc::formatdoc; use tar::Archive; @@ -27,6 +28,8 @@ async fn main() -> anyhow::Result<()> { ensure!(dic_dir.exists(), "`{dic_dir}` does not exist"); } + copy_onnxruntime(out_dir.as_ref(), dist)?; + create_sample_voice_model_file(out_dir, dist)?; generate_example_data_json(dist.as_ref())?; @@ -92,6 +95,35 @@ fn create_sample_voice_model_file(out_dir: &Utf8Path, dist: &Utf8Path) -> anyhow Ok(()) } +fn copy_onnxruntime(out_dir: &Path, dist: &Utf8Path) -> anyhow::Result<()> { + use std::env::consts::{DLL_PREFIX, DLL_SUFFIX}; + + let cargo_metadata::Metadata { + target_directory, .. + } = MetadataCommand::new() + .manifest_path(Path::new(env!("CARGO_MANIFEST_DIR")).join("Cargo.toml")) + .exec()?; + + const VERSION: &str = ort::downloaded_version!(); + let filename = &if cfg!(target_os = "linux") { + format!("libonnxruntime.so.{VERSION}") + } else if cfg!(any(target_os = "macos", target_os = "ios")) { + format!("libonnxruntime.{VERSION}.dylib") + } else { + format!("{DLL_PREFIX}onnxruntime{DLL_SUFFIX}") + }; + let src = &target_directory.join("debug").join(filename); + let dst_dir = &dist.join("lib"); + let dst = &dst_dir.join(filename); + fs_err::create_dir_all(dst_dir)?; + fs_err::copy(src, dst)?; + println!("cargo:rerun-if-changed={src}"); + + fs_err::write(out_dir.join("onnxruntime-dylib-path.txt"), dst.as_str())?; + + Ok(()) +} + /// OpenJTalkの辞書をダウンロードして展開する。 async fn download_open_jtalk_dict(dist: &Path) -> anyhow::Result<()> { let download_url = format!( @@ -187,6 +219,8 @@ fn generate_c_api_rs_bindings(out_dir: &Utf8Path) -> anyhow::Result<()> { bindgen::Builder::default() .header(C_BINDINGS_PATH) .header(ADDITIONAL_C_BINDINGS_PATH) + // we test for `--feature load-onnxruntime` + .clang_arg("-DVOICEVOX_LOAD_ONNXRUNTIME=") .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) .dynamic_library_name("CApi") .generate()? diff --git a/crates/test_util/src/lib.rs b/crates/test_util/src/lib.rs index f234d7f76..6473e438e 100644 --- a/crates/test_util/src/lib.rs +++ b/crates/test_util/src/lib.rs @@ -23,6 +23,9 @@ pub use self::typing::{ DecodeExampleData, DurationExampleData, ExampleData, IntonationExampleData, }; +pub const ONNXRUNTIME_DYLIB_PATH: &str = + include_str!(concat!(env!("OUT_DIR"), "/onnxruntime-dylib-path.txt")); + pub const OPEN_JTALK_DIC_DIR: &str = concat!( env!("CARGO_MANIFEST_DIR"), "/data/open_jtalk_dic_utf_8-1.11" diff --git a/crates/voicevox_core/Cargo.toml b/crates/voicevox_core/Cargo.toml index 9957c1373..527fa7494 100644 --- a/crates/voicevox_core/Cargo.toml +++ b/crates/voicevox_core/Cargo.toml @@ -4,8 +4,18 @@ version.workspace = true edition.workspace = true publish.workspace = true +[package.metadata.docs.rs] +features = ["load-onnxruntime", "link-onnxruntime"] +rustdoc-args = ["--cfg", "docsrs"] + [features] default = [] + +# ONNX Runtimeのリンク方法を決めるフィーチャ(rustdocを参照)。 +load-onnxruntime = ["voicevox-ort/load-dynamic"] +link-onnxruntime = [] + +# GPUを利用可能にするフィーチャ(rustdocを参照)。 cuda = ["voicevox-ort/cuda"] directml = ["voicevox-ort/directml"] @@ -13,6 +23,7 @@ directml = ["voicevox-ort/directml"] anyhow.workspace = true async_zip = { workspace = true, features = ["deflate"] } camino.workspace = true +const_format.workspace = true derive-getters.workspace = true derive-new.workspace = true derive_more.workspace = true @@ -30,6 +41,7 @@ once_cell.workspace = true open_jtalk.workspace = true ouroboros.workspace = true rayon.workspace = true +ref-cast.workspace = true regex.workspace = true serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true, features = ["preserve_order"] } @@ -41,8 +53,8 @@ thiserror.workspace = true tokio = { workspace = true, features = ["rt"] } # FIXME: feature-gateする tracing.workspace = true uuid = { workspace = true, features = ["v4", "serde"] } +voicevox-ort = { workspace = true, features = ["download-binaries", "__init-for-voicevox"] } voicevox_core_macros = { path = "../voicevox_core_macros" } -voicevox-ort = { workspace = true, features = ["ndarray", "download-binaries"] } zip.workspace = true [dev-dependencies] @@ -51,7 +63,7 @@ pretty_assertions.workspace = true rstest.workspace = true rstest_reuse.workspace = true test_util.workspace = true -tokio = { workspace = true, features = ["rt", "macros"] } +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } [target."cfg(windows)".dependencies] humansize.workspace = true diff --git a/crates/voicevox_core/src/__internal/doctest_fixtures.rs b/crates/voicevox_core/src/__internal/doctest_fixtures.rs index f314845fe..8f45cba73 100644 --- a/crates/voicevox_core/src/__internal/doctest_fixtures.rs +++ b/crates/voicevox_core/src/__internal/doctest_fixtures.rs @@ -1,4 +1,4 @@ -use std::path::Path; +use std::{ffi::OsString, path::Path}; use camino::Utf8Path; @@ -6,9 +6,19 @@ use crate::{AccelerationMode, InitializeOptions}; pub async fn synthesizer_with_sample_voice_model( voice_model_path: impl AsRef, + #[cfg_attr(feature = "link-onnxruntime", allow(unused_variables))] onnxruntime_dylib_path: impl Into< + OsString, + >, open_jtalk_dic_dir: impl AsRef, ) -> anyhow::Result> { let syntesizer = crate::tokio::Synthesizer::new( + #[cfg(feature = "load-onnxruntime")] + crate::tokio::Onnxruntime::load_once() + .filename(onnxruntime_dylib_path) + .exec() + .await?, + #[cfg(feature = "link-onnxruntime")] + crate::tokio::Onnxruntime::init_once().await?, crate::tokio::OpenJtalk::new(open_jtalk_dic_dir).await?, &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, diff --git a/crates/voicevox_core/src/blocking.rs b/crates/voicevox_core/src/blocking.rs index aa600c598..8d0bc2129 100644 --- a/crates/voicevox_core/src/blocking.rs +++ b/crates/voicevox_core/src/blocking.rs @@ -1,6 +1,13 @@ //! ブロッキング版API。 pub use crate::{ - engine::open_jtalk::blocking::OpenJtalk, synthesizer::blocking::Synthesizer, - user_dict::dict::blocking::UserDict, voice_model::blocking::VoiceModel, + engine::open_jtalk::blocking::OpenJtalk, infer::runtimes::onnxruntime::blocking::Onnxruntime, + synthesizer::blocking::Synthesizer, user_dict::dict::blocking::UserDict, + voice_model::blocking::VoiceModel, }; + +pub mod onnxruntime { + #[cfg(feature = "load-onnxruntime")] + #[cfg_attr(docsrs, doc(cfg(feature = "load-onnxruntime")))] + pub use crate::infer::runtimes::onnxruntime::blocking::LoadOnce; +} diff --git a/crates/voicevox_core/src/devices.rs b/crates/voicevox_core/src/devices.rs index 54b3de2f7..140105962 100644 --- a/crates/voicevox_core/src/devices.rs +++ b/crates/voicevox_core/src/devices.rs @@ -1,8 +1,6 @@ use derive_getters::Getters; use serde::{Deserialize, Serialize}; -use crate::{infer::InferenceRuntime, synthesizer::InferenceRuntimeImpl, Result}; - /// このライブラリで利用可能なデバイスの情報。 /// /// あくまで本ライブラリが対応しているデバイスの情報であることに注意。GPUが使える環境ではなかったと @@ -30,37 +28,7 @@ pub struct SupportedDevices { } impl SupportedDevices { - /// `SupportedDevices`をコンストラクトする。 - /// - /// # Example - /// - #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537 - #[cfg_attr(not(windows), doc = "```")] - /// use voicevox_core::SupportedDevices; - /// - /// let supported_devices = SupportedDevices::create()?; - /// # - /// # Result::<_, anyhow::Error>::Ok(()) - /// ``` - pub fn create() -> Result { - ::supported_devices() - } - pub fn to_json(&self) -> serde_json::Value { serde_json::to_value(self).expect("should not fail") } } - -#[cfg(test)] -mod tests { - use rstest::rstest; - - use super::SupportedDevices; - - #[rstest] - fn supported_devices_create_works() { - let result = SupportedDevices::create(); - // 環境によって結果が変わるので、関数呼び出しが成功するかどうかの確認のみ行う - assert!(result.is_ok(), "{result:?}"); - } -} diff --git a/crates/voicevox_core/src/error.rs b/crates/voicevox_core/src/error.rs index 916964429..d0e7fced0 100644 --- a/crates/voicevox_core/src/error.rs +++ b/crates/voicevox_core/src/error.rs @@ -34,6 +34,7 @@ impl Error { match &self.0 { ErrorRepr::NotLoadedOpenjtalkDict => ErrorKind::NotLoadedOpenjtalkDict, ErrorRepr::GpuSupport => ErrorKind::GpuSupport, + ErrorRepr::InitInferenceRuntime { .. } => ErrorKind::InitInferenceRuntime, ErrorRepr::LoadModel(LoadModelError { context, .. }) => match context { LoadModelErrorKind::OpenZipFile => ErrorKind::OpenZipFile, LoadModelErrorKind::ReadZipEntry { .. } => ErrorKind::ReadZipEntry, @@ -65,6 +66,13 @@ pub(crate) enum ErrorRepr { #[error("GPU機能をサポートすることができません")] GpuSupport, + #[error("{runtime_display_name}のロードまたは初期化ができませんでした")] + InitInferenceRuntime { + runtime_display_name: &'static str, + #[source] + source: anyhow::Error, + }, + #[error(transparent)] LoadModel(#[from] LoadModelError), @@ -119,6 +127,8 @@ pub enum ErrorKind { NotLoadedOpenjtalkDict, /// GPUモードがサポートされていない。 GpuSupport, + /// 推論ライブラリのロードまたは初期化ができなかった。 + InitInferenceRuntime, /// ZIPファイルを開くことに失敗した。 OpenZipFile, /// ZIP内のファイルが読めなかった。 diff --git a/crates/voicevox_core/src/infer.rs b/crates/voicevox_core/src/infer.rs index c2cad1d7d..cffd0d524 100644 --- a/crates/voicevox_core/src/infer.rs +++ b/crates/voicevox_core/src/infer.rs @@ -18,10 +18,12 @@ pub(crate) trait InferenceRuntime: 'static { type Session: Sized + Send + 'static; type RunContext<'a>: From<&'a mut Self::Session> + PushInputTensor; - fn supported_devices() -> crate::Result; + /// このライブラリで利用可能なデバイスの情報を取得する。 + fn supported_devices(&self) -> crate::Result; #[allow(clippy::type_complexity)] fn new_session( + &self, model: impl FnOnce() -> std::result::Result, DecryptModelError>, options: InferenceSessionOptions, ) -> anyhow::Result<( diff --git a/crates/voicevox_core/src/infer/runtimes.rs b/crates/voicevox_core/src/infer/runtimes.rs index 7934027b6..e9d3d31c4 100644 --- a/crates/voicevox_core/src/infer/runtimes.rs +++ b/crates/voicevox_core/src/infer/runtimes.rs @@ -1,3 +1 @@ -mod onnxruntime; - -pub(crate) use self::onnxruntime::Onnxruntime; +pub(crate) mod onnxruntime; diff --git a/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs b/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs index f8f376837..74dc8a601 100644 --- a/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs +++ b/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs @@ -15,17 +15,14 @@ use super::super::{ OutputScalarKind, OutputTensor, ParamInfo, PushInputTensor, }; -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] -pub(crate) enum Onnxruntime {} - -impl InferenceRuntime for Onnxruntime { +// TODO: `trait AsyncRuntime`みたいなものを作って抽象化しながら同期版と非同期版に別個の役割を +// 持たせる +// (なぜそうしたいかの理由の一つとしては) +impl InferenceRuntime for self::blocking::Onnxruntime { type Session = ort::Session; type RunContext<'a> = OnnxruntimeRunContext<'a>; - fn supported_devices() -> crate::Result { - // TODO: `InferenceRuntime::init`と`InitInferenceRuntimeError`を作る - build_ort_env_once().unwrap(); - + fn supported_devices(&self) -> crate::Result { (|| { let cpu = CPUExecutionProvider::default().is_available()?; let cuda = CUDAExecutionProvider::default().is_available()?; @@ -44,6 +41,7 @@ impl InferenceRuntime for Onnxruntime { } fn new_session( + &self, model: impl FnOnce() -> std::result::Result, DecryptModelError>, options: InferenceSessionOptions, ) -> anyhow::Result<( @@ -51,9 +49,6 @@ impl InferenceRuntime for Onnxruntime { Vec>, Vec>, )> { - // TODO: `InferenceRuntime::init`と`InitInferenceRuntimeError`を作る - build_ort_env_once().unwrap(); - let mut builder = ort::Session::builder()? .with_optimization_level(GraphOptimizationLevel::Level1)? .with_intra_threads(options.cpu_num_threads.into())?; @@ -181,12 +176,6 @@ impl InferenceRuntime for Onnxruntime { } } -fn build_ort_env_once() -> ort::Result<()> { - static ONCE: once_cell::sync::OnceCell<()> = once_cell::sync::OnceCell::new(); - ONCE.get_or_try_init(|| ort::init().with_name(env!("CARGO_PKG_NAME")).commit())?; - Ok(()) -} - pub(crate) struct OnnxruntimeRunContext<'sess> { sess: &'sess ort::Session, inputs: Vec>, @@ -225,3 +214,365 @@ impl PushInputTensor for OnnxruntimeRunContext<'_> { self.push_input(tensor) } } + +pub(crate) mod blocking { + use ort::EnvHandle; + use ref_cast::{ref_cast_custom, RefCastCustom}; + + use crate::{error::ErrorRepr, SupportedDevices}; + + use super::super::super::InferenceRuntime; + + /// ONNX Runtime。 + /// + /// シングルトンであり、インスタンスは高々一つ。 + /// + /// # Rust APIにおけるインスタンスの共有 + /// + /// インスタンスは[voicevox-ort]側に作られる。Rustのクレートとしてこのライブラリを利用する場合、 + /// Tokio版APIやvoicevox-ortを利用する他クレートともインスタンスが共有される。 + /// + #[cfg_attr(feature = "load-onnxruntime", doc = "```")] + #[cfg_attr(not(feature = "load-onnxruntime"), doc = "```compile_fail")] + /// # use voicevox_core as another_lib; + /// # + /// # fn main() -> anyhow::Result<()> { + /// # if cfg!(windows) { + /// # // Windows\System32\onnxruntime.dllを回避 + /// # voicevox_core::blocking::Onnxruntime::load_once() + /// # .filename(test_util::ONNXRUNTIME_DYLIB_PATH) + /// # .exec()?; + /// # } + /// let ort1 = voicevox_core::blocking::Onnxruntime::load_once().exec()?; + /// let ort2 = another_lib::tokio::Onnxruntime::get().expect("`ort1`と同一のはず"); + /// assert_eq!(ptr_addr(ort1), ptr_addr(ort2)); + /// + /// fn ptr_addr(obj: &impl Sized) -> usize { + /// obj as *const _ as _ + /// } + /// # Ok(()) + /// # } + /// ``` + /// + /// [voicevox-ort]: https://github.com/VOICEVOX/ort + #[derive(Debug, RefCastCustom)] + #[repr(transparent)] + pub struct Onnxruntime { + _inner: EnvHandle, + } + + impl Onnxruntime { + /// ONNX Runtimeのライブラリ名。 + #[cfg(feature = "load-onnxruntime")] + #[cfg_attr(docsrs, doc(cfg(feature = "load-onnxruntime")))] + pub const LIB_NAME: &'static str = "onnxruntime"; + + /// 推奨されるONNX Runtimeのバージョン。 + #[cfg(feature = "load-onnxruntime")] + #[cfg_attr(docsrs, doc(cfg(feature = "load-onnxruntime")))] + pub const LIB_VERSION: &'static str = ort::downloaded_version!(); + + /// [`LIB_NAME`]と[`LIB_VERSION`]からなる動的ライブラリのファイル名。 + /// + /// WindowsとAndroidでは[`LIB_UNVERSIONED_FILENAME`]と同じ。 + /// + /// [`LIB_NAME`]: Self::LIB_NAME + /// [`LIB_VERSION`]: Self::LIB_VERSION + /// [`LIB_UNVERSIONED_FILENAME`]: Self::LIB_UNVERSIONED_FILENAME + #[cfg(feature = "load-onnxruntime")] + #[cfg_attr(docsrs, doc(cfg(feature = "load-onnxruntime")))] + pub const LIB_VERSIONED_FILENAME: &'static str = if cfg!(target_os = "linux") { + const_format::concatcp!( + "lib", + Onnxruntime::LIB_NAME, + ".so.", + Onnxruntime::LIB_VERSION, + ) + } else if cfg!(any(target_os = "macos", target_os = "ios")) { + const_format::concatcp!( + "lib", + Onnxruntime::LIB_NAME, + ".", + Onnxruntime::LIB_VERSION, + ".dylib", + ) + } else { + Self::LIB_UNVERSIONED_FILENAME + }; + + /// [`LIB_NAME`]からなる動的ライブラリのファイル名。 + /// + /// [`LIB_NAME`]: Self::LIB_NAME + #[cfg(feature = "load-onnxruntime")] + #[cfg_attr(docsrs, doc(cfg(feature = "load-onnxruntime")))] + pub const LIB_UNVERSIONED_FILENAME: &'static str = const_format::concatcp!( + std::env::consts::DLL_PREFIX, + Onnxruntime::LIB_NAME, + std::env::consts::DLL_SUFFIX, + ); + + #[ref_cast_custom] + const fn new(inner: &EnvHandle) -> &Self; + + /// インスタンスが既に作られているならそれを得る。 + /// + /// 作られていなければ`None`を返す。 + pub fn get() -> Option<&'static Self> { + EnvHandle::get().map(Self::new) + } + + fn once( + init: impl FnOnce() -> anyhow::Result<&'static EnvHandle>, + ) -> crate::Result<&'static Self> { + let inner = init().map_err(|source| ErrorRepr::InitInferenceRuntime { + runtime_display_name: "ONNX Runtime", + source, + })?; + Ok(Self::new(inner)) + } + + /// ONNX Runtimeをロードして初期化する。 + /// + /// 一度成功したら、以後は引数を無視して同じ参照を返す。 + #[cfg(feature = "load-onnxruntime")] + #[cfg_attr(docsrs, doc(cfg(feature = "load-onnxruntime")))] + pub fn load_once() -> LoadOnce { + LoadOnce::default() + } + + /// ONNX Runtimeを初期化する。 + /// + /// 一度成功したら以後は同じ参照を返す。 + #[cfg(feature = "link-onnxruntime")] + #[cfg_attr(docsrs, doc(cfg(feature = "link-onnxruntime")))] + pub fn init_once() -> crate::Result<&'static Self> { + Self::once(|| ort::try_init(None)) + } + + #[cfg(test)] + pub(crate) fn from_test_util_data() -> anyhow::Result<&'static Self> { + #[cfg(feature = "load-onnxruntime")] + { + Self::load_once() + .filename(test_util::ONNXRUNTIME_DYLIB_PATH) + .exec() + .map_err(Into::into) + } + + #[cfg(feature = "link-onnxruntime")] + { + Self::init_once().map_err(Into::into) + } + } + + /// このライブラリで利用可能なデバイスの情報を取得する。 + pub fn supported_devices(&self) -> crate::Result { + ::supported_devices(self) + } + } + + /// [`Onnxruntime::load_once`]のビルダー。 + #[cfg(feature = "load-onnxruntime")] + pub struct LoadOnce { + filename: std::ffi::OsString, + } + + #[cfg(feature = "load-onnxruntime")] + impl Default for LoadOnce { + fn default() -> Self { + let filename = Onnxruntime::LIB_VERSIONED_FILENAME.into(); + Self { filename } + } + } + + #[cfg(feature = "load-onnxruntime")] + impl LoadOnce { + /// ONNX Runtimeのファイル名(モジュール名)もしくはファイルパスを指定する。 + /// + /// `dlopen`/[`LoadLibraryExW`]の引数に使われる。デフォルト + /// は[`Onnxruntime::LIB_VERSIONED_FILENAME`]。 + /// + /// [`LoadLibraryExW`]: + /// https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-loadlibraryexw + pub fn filename(mut self, filename: impl Into) -> Self { + self.filename = filename.into(); + self + } + + /// 実行する。 + pub fn exec(self) -> crate::Result<&'static Onnxruntime> { + Onnxruntime::once(|| ort::try_init_from(&self.filename, None)) + } + } +} + +pub(crate) mod tokio { + use ref_cast::{ref_cast_custom, RefCastCustom}; + + use crate::SupportedDevices; + + /// ONNX Runtime。 + /// + /// シングルトンであり、インスタンスは高々一つ。 + /// + /// # Rust APIにおけるインスタンスの共有 + /// + /// インスタンスは[voicevox-ort]側に作られる。Rustのクレートとしてこのライブラリを利用する場合、 + /// ブロッキング版APIやvoicevox-ortを利用する他クレートともインスタンスが共有される。 + /// + #[cfg_attr(feature = "load-onnxruntime", doc = "```")] + #[cfg_attr(not(feature = "load-onnxruntime"), doc = "```compile_fail")] + /// # use voicevox_core as another_lib; + /// # + /// # #[tokio::main] + /// # async fn main() -> anyhow::Result<()> { + /// # if cfg!(windows) { + /// # // Windows\System32\onnxruntime.dllを回避 + /// # voicevox_core::blocking::Onnxruntime::load_once() + /// # .filename(test_util::ONNXRUNTIME_DYLIB_PATH) + /// # .exec()?; + /// # } + /// let ort1 = voicevox_core::tokio::Onnxruntime::load_once().exec().await?; + /// let ort2 = another_lib::blocking::Onnxruntime::get().expect("`ort1`と同一のはず"); + /// assert_eq!(ptr_addr(ort1), ptr_addr(ort2)); + /// + /// fn ptr_addr(obj: &impl Sized) -> usize { + /// obj as *const _ as _ + /// } + /// # Ok(()) + /// # } + /// ``` + /// + /// [voicevox-ort]: https://github.com/VOICEVOX/ort + #[derive(Debug, RefCastCustom)] + #[repr(transparent)] + pub struct Onnxruntime(pub(crate) super::blocking::Onnxruntime); + + impl Onnxruntime { + /// ONNX Runtimeのライブラリ名。 + #[cfg(feature = "load-onnxruntime")] + #[cfg_attr(docsrs, doc(cfg(feature = "load-onnxruntime")))] + // ブロッキング版と等しいことはテストで担保 + pub const LIB_NAME: &'static str = "onnxruntime"; + + /// 推奨されるONNX Runtimeのバージョン。 + #[cfg(feature = "load-onnxruntime")] + #[cfg_attr(docsrs, doc(cfg(feature = "load-onnxruntime")))] + // ブロッキング版と等しいことはテストで担保 + pub const LIB_VERSION: &'static str = ort::downloaded_version!(); + + /// [`LIB_NAME`]と[`LIB_VERSION`]からなる動的ライブラリのファイル名。 + /// + /// WindowsとAndroidでは[`LIB_UNVERSIONED_FILENAME`]と同じ。 + /// + /// [`LIB_NAME`]: Self::LIB_NAME + /// [`LIB_VERSION`]: Self::LIB_VERSION + /// [`LIB_UNVERSIONED_FILENAME`]: Self::LIB_UNVERSIONED_FILENAME + #[cfg(feature = "load-onnxruntime")] + #[cfg_attr(docsrs, doc(cfg(feature = "load-onnxruntime")))] + pub const LIB_VERSIONED_FILENAME: &'static str = + super::blocking::Onnxruntime::LIB_VERSIONED_FILENAME; + + /// [`LIB_NAME`]からなる動的ライブラリのファイル名。 + /// + /// [`LIB_NAME`]: Self::LIB_NAME + #[cfg(feature = "load-onnxruntime")] + #[cfg_attr(docsrs, doc(cfg(feature = "load-onnxruntime")))] + pub const LIB_UNVERSIONED_FILENAME: &'static str = + super::blocking::Onnxruntime::LIB_UNVERSIONED_FILENAME; + + #[ref_cast_custom] + pub(crate) const fn from_blocking(blocking: &super::blocking::Onnxruntime) -> &Self; + + /// インスタンスが既に作られているならそれを得る。 + /// + /// 作られていなければ`None`を返す。 + pub fn get() -> Option<&'static Self> { + super::blocking::Onnxruntime::get().map(Self::from_blocking) + } + + /// ONNX Runtimeをロードして初期化する。 + /// + /// 一度成功したら、以後は引数を無視して同じ参照を返す。 + #[cfg(feature = "load-onnxruntime")] + #[cfg_attr(docsrs, doc(cfg(feature = "load-onnxruntime")))] + pub fn load_once() -> LoadOnce { + LoadOnce::default() + } + + /// ONNX Runtimeを初期化する。 + /// + /// 一度成功したら以後は同じ参照を返す。 + #[cfg(feature = "link-onnxruntime")] + #[cfg_attr(docsrs, doc(cfg(feature = "link-onnxruntime")))] + pub async fn init_once() -> crate::Result<&'static Self> { + let inner = crate::task::asyncify(super::blocking::Onnxruntime::init_once).await?; + Ok(Self::from_blocking(inner)) + } + + #[cfg(test)] + pub(crate) async fn from_test_util_data() -> anyhow::Result<&'static Self> { + crate::task::asyncify(super::blocking::Onnxruntime::from_test_util_data) + .await + .map(Self::from_blocking) + } + + /// このライブラリで利用可能なデバイスの情報を取得する。 + pub fn supported_devices(&self) -> crate::Result { + self.0.supported_devices() + } + } + + /// [`Onnxruntime::load_once`]のビルダー。 + #[cfg(feature = "load-onnxruntime")] + #[derive(Default)] + pub struct LoadOnce(super::blocking::LoadOnce); + + #[cfg(feature = "load-onnxruntime")] + impl LoadOnce { + /// ONNX Runtimeのファイル名(モジュール名)もしくはファイルパスを指定する。 + /// + /// `dlopen`/[`LoadLibraryExW`]の引数に使われる。デフォルト + /// は[`Onnxruntime::LIB_VERSIONED_FILENAME`]。 + /// + /// [`LoadLibraryExW`]: + /// https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-loadlibraryexw + pub fn filename(self, filename: impl Into) -> Self { + Self(self.0.filename(filename)) + } + + /// 実行する。 + pub async fn exec(self) -> crate::Result<&'static Onnxruntime> { + let inner = crate::task::asyncify(|| self.0.exec()).await?; + Ok(Onnxruntime::from_blocking(inner)) + } + } +} + +#[cfg(test)] +mod tests { + use rstest::rstest; + + #[cfg(feature = "load-onnxruntime")] + #[test] + fn assert_same_lib_names_and_versions() { + use pretty_assertions::assert_eq; + + assert_eq!( + super::blocking::Onnxruntime::LIB_NAME, + super::tokio::Onnxruntime::LIB_NAME, + ); + assert_eq!( + super::blocking::Onnxruntime::LIB_VERSION, + super::tokio::Onnxruntime::LIB_VERSION, + ); + } + + #[rstest] + fn supported_devices_works() { + let result = super::blocking::Onnxruntime::from_test_util_data() + .and_then(|o| o.supported_devices().map_err(Into::into)); + // 環境によって結果が変わるので、関数呼び出しが成功するかどうかの確認のみ行う + assert!(result.is_ok(), "{result:?}"); + } +} diff --git a/crates/voicevox_core/src/infer/session_set.rs b/crates/voicevox_core/src/infer/session_set.rs index cdd179680..95f081bac 100644 --- a/crates/voicevox_core/src/infer/session_set.rs +++ b/crates/voicevox_core/src/infer/session_set.rs @@ -17,6 +17,7 @@ pub(crate) struct InferenceSessionSet( impl InferenceSessionSet { pub(crate) fn new( + rt: &R, model_bytes: &EnumMap>, options: &EnumMap, ) -> anyhow::Result { @@ -27,7 +28,7 @@ impl InferenceSessionSet { ::PARAM_INFOS[op]; let (sess, actual_input_param_infos, actual_output_param_infos) = - R::new_session(|| model_file::decrypt(model_bytes), options[op])?; + rt.new_session(|| model_file::decrypt(model_bytes), options[op])?; check_param_infos(expected_input_param_infos, &actual_input_param_infos)?; check_param_infos(expected_output_param_infos, &actual_output_param_infos)?; diff --git a/crates/voicevox_core/src/lib.rs b/crates/voicevox_core/src/lib.rs index 0f34c5962..910f9eeef 100644 --- a/crates/voicevox_core/src/lib.rs +++ b/crates/voicevox_core/src/lib.rs @@ -1,4 +1,66 @@ //! 無料で使える中品質なテキスト読み上げソフトウェア、VOICEVOXのコア。 +//! +//! # Feature flags +//! +//! ## ONNX Runtimeのリンク方法を決めるフィーチャ +//! +//! このクレートの利用にあたっては以下の二つの[Cargoフィーチャ]のうちどちらかを有効にしなければなり +//! ません。両方の有効化はコンパイルエラーとなります。[`Onnxruntime`]の初期化方法はこれらの +//! フィーチャによって決まります。 +//! +//! - **`load-onnxruntime`**: ONNX Runtimeを`dlopen`/`LoadLibraryExW`で開きます。 +//! - **`link-onnxruntime`**: ONNX Runtimeをロード時動的リンクします。iOSのような`dlopen`の利用が +//! 困難な環境でのみこちらを利用するべきです。_Note_: +//! [動的リンク対象のライブラリ名]は`onnxruntime`で固定です。変更 +//! は`patchelf(1)`や`install_name_tool(1)`で行ってください。 +//! +//! ## GPUを利用可能にするフィーチャ +//! +//! - **`cuda`** +//! - **`directml`** +// TODO: こんな感じ(↓)で書く +////! - **`cuda`**: [CUDAを用いた機械学習推論]を可能にします。 +////! - ❗ [acceleration\_mode]={Gpu,Auto}のときの挙動が変化します。`directml`と共に +////! 有効化したときの挙動は未規定です。 +////! - **`directml`**: [DirectMLを用いた機械学習推論]を可能にします。 +////! - ❗ 〃 +////! +////! [CUDAを用いた機械学習推論]: +////! https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html +////! [DirectMLを用いた機械学習推論]: +////! https://onnxruntime.ai/docs/execution-providers/DirectML-ExecutionProvider.html +////! [acceleration\_mode]: InitializeOptions::acceleration_mode +//! +//! [Cargoフィーチャ]: https://doc.rust-lang.org/stable/cargo/reference/features.html +//! [動的リンク対象のライブラリ名]: +//! https://doc.rust-lang.org/cargo/reference/build-scripts.html#rustc-link-lib +//! [`Onnxruntime`]: blocking::Onnxruntime + +#![cfg_attr(docsrs, feature(doc_cfg))] + +#[cfg(not(any(feature = "load-onnxruntime", feature = "link-onnxruntime")))] +compile_error!("either `load-onnxruntime` or `link-onnxruntime` must be enabled"); + +#[cfg(not(doc))] +const _: () = { + #[cfg(all(feature = "load-onnxruntime", feature = "link-onnxruntime"))] + compile_error!("`load-onnxruntime` and `link-onnxruntime` cannot be enabled at the same time"); + + // Rust APIでvoicevox-ortを他のクレートが利用する可能性を考え、voicevox-ort側とfeatureがズレ + // ないようにする + + #[cfg(feature = "load-onnxruntime")] + ort::assert_feature!( + cfg(feature = "load-dynamic"), + "when `load-onnxruntime` is enabled,`voicevox-ort/load-dynamic` must be also enabled", + ); + + #[cfg(feature = "link-onnxruntime")] + ort::assert_feature!( + cfg(not(feature = "load-dynamic")), + "when `link-onnxruntime` is enabled,`voicevox-ort/load-dynamic` must be disabled", + ); +}; mod devices; /// cbindgen:ignore diff --git a/crates/voicevox_core/src/status.rs b/crates/voicevox_core/src/status.rs index f590e18f4..475031f1c 100644 --- a/crates/voicevox_core/src/status.rs +++ b/crates/voicevox_core/src/status.rs @@ -21,13 +21,18 @@ use crate::{ }; pub(crate) struct Status { + pub(crate) rt: &'static R, loaded_models: std::sync::Mutex>, session_options: InferenceDomainMap, } impl Status { - pub(crate) fn new(session_options: InferenceDomainMap) -> Self { + pub(crate) fn new( + rt: &'static R, + session_options: InferenceDomainMap, + ) -> Self { Self { + rt, loaded_models: Default::default(), session_options, } @@ -44,7 +49,7 @@ impl Status { .ensure_acceptable(model_header)?; let session_sets_with_inner_ids = model_contents - .create_session_sets(&self.session_options) + .create_session_sets(self.rt, &self.session_options) .map_err(|source| LoadModelError { path: model_header.path.clone(), context: LoadModelErrorKind::InvalidModelData, @@ -310,6 +315,7 @@ impl InferenceDomainMap { fn create_session_sets( &self, + rt: &R, session_options: &InferenceDomainMap, ) -> anyhow::Result>> { duplicate! { @@ -321,7 +327,7 @@ impl InferenceDomainMap { .field .as_ref() .map(|(inner_voice_ids, model_bytes)| { - let session_set = InferenceSessionSet::new(model_bytes, &session_options.field)?; + let session_set = InferenceSessionSet::new(rt, model_bytes, &session_options.field)?; Ok::<_, anyhow::Error>((inner_voice_ids.clone(), session_set)) }) .transpose()?; @@ -348,7 +354,6 @@ mod tests { InferenceSessionOptions, }, macros::tests::assert_debug_fmt_eq, - synthesizer::InferenceRuntimeImpl, }; use super::Status; @@ -371,7 +376,10 @@ mod tests { TalkOperation::Decode => heavy_session_options, }, }; - let status = Status::::new(session_options); + let status = Status::new( + crate::blocking::Onnxruntime::from_test_util_data().unwrap(), + session_options, + ); assert_eq!( light_session_options, @@ -392,9 +400,12 @@ mod tests { #[rstest] #[tokio::test] async fn status_load_model_works() { - let status = Status::::new(InferenceDomainMap { - talk: enum_map!(_ => InferenceSessionOptions::new(0, false)), - }); + let status = Status::new( + crate::blocking::Onnxruntime::from_test_util_data().unwrap(), + InferenceDomainMap { + talk: enum_map!(_ => InferenceSessionOptions::new(0, false)), + }, + ); let model = &crate::tokio::VoiceModel::sample().await.unwrap(); let model_contents = &model.read_inference_models().await.unwrap(); let result = status.insert_model(model.header(), model_contents); @@ -405,9 +416,12 @@ mod tests { #[rstest] #[tokio::test] async fn status_is_model_loaded_works() { - let status = Status::::new(InferenceDomainMap { - talk: enum_map!(_ => InferenceSessionOptions::new(0, false)), - }); + let status = Status::new( + crate::blocking::Onnxruntime::from_test_util_data().unwrap(), + InferenceDomainMap { + talk: enum_map!(_ => InferenceSessionOptions::new(0, false)), + }, + ); let vvm = &crate::tokio::VoiceModel::sample().await.unwrap(); let model_header = vvm.header(); let model_contents = &vvm.read_inference_models().await.unwrap(); diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs index 4b26eb56b..767d27b1c 100644 --- a/crates/voicevox_core/src/synthesizer.rs +++ b/crates/voicevox_core/src/synthesizer.rs @@ -1,5 +1,3 @@ -use crate::infer::runtimes::Onnxruntime; - /// [`blocking::Synthesizer::synthesis`]および[`tokio::Synthesizer::synthesis`]のオプション。 /// /// [`blocking::Synthesizer::synthesis`]: blocking::Synthesizer::synthesis @@ -68,8 +66,6 @@ pub struct InitializeOptions { pub cpu_num_threads: u16, } -pub(crate) type InferenceRuntimeImpl = Onnxruntime; - pub(crate) mod blocking { // FIXME: ここのdocのコードブロックはasync版のものなので、`tokio`モジュールの方に移した上で、 // (ブロッキング版をpublic APIにするならの話ではあるが)ブロッキング版はブロッキング版でコード例 @@ -93,16 +89,16 @@ pub(crate) mod blocking { status::Status, text_analyzer::{KanaAnalyzer, OpenJTalkAnalyzer, TextAnalyzer}, AccentPhraseModel, AudioQueryModel, FullcontextExtractor, Result, StyleId, - SupportedDevices, SynthesisOptions, VoiceModelId, VoiceModelMeta, + SynthesisOptions, VoiceModelId, VoiceModelMeta, }; - use super::{AccelerationMode, InferenceRuntimeImpl, InitializeOptions, TtsOptions}; + use super::{AccelerationMode, InitializeOptions, TtsOptions}; const DEFAULT_SAMPLING_RATE: u32 = 24000; /// 音声シンセサイザ。 pub struct Synthesizer { - pub(super) status: Status, + pub(super) status: Status, open_jtalk_analyzer: OpenJTalkAnalyzer, kana_analyzer: KanaAnalyzer, use_gpu: bool, @@ -113,22 +109,29 @@ pub(crate) mod blocking { /// /// # Example /// - #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537 - #[cfg_attr(not(windows), doc = "```")] + #[cfg_attr(feature = "load-onnxruntime", doc = "```")] + #[cfg_attr(not(feature = "load-onnxruntime"), doc = "```compile_fail")] /// # #[tokio::main] /// # async fn main() -> anyhow::Result<()> { - /// # use test_util::OPEN_JTALK_DIC_DIR; + /// # use test_util::{ONNXRUNTIME_DYLIB_PATH, OPEN_JTALK_DIC_DIR}; /// # /// # const ACCELERATION_MODE: AccelerationMode = AccelerationMode::Cpu; /// # /// use std::sync::Arc; /// /// use voicevox_core::{ - /// tokio::{OpenJtalk, Synthesizer}, + /// tokio::{Onnxruntime, OpenJtalk, Synthesizer}, /// AccelerationMode, InitializeOptions, /// }; /// + /// # if cfg!(windows) { + /// # // Windows\System32\onnxruntime.dllを回避 + /// # voicevox_core::blocking::Onnxruntime::load_once() + /// # .filename(test_util::ONNXRUNTIME_DYLIB_PATH) + /// # .exec()?; + /// # } /// let mut syntesizer = Synthesizer::new( + /// Onnxruntime::load_once().exec().await?, /// Arc::new(OpenJtalk::new(OPEN_JTALK_DIC_DIR).await.unwrap()), /// &InitializeOptions { /// acceleration_mode: ACCELERATION_MODE, @@ -139,13 +142,17 @@ pub(crate) mod blocking { /// # Ok(()) /// # } /// ``` - pub fn new(open_jtalk: O, options: &InitializeOptions) -> Result { + pub fn new( + onnxruntime: &'static crate::blocking::Onnxruntime, + open_jtalk: O, + options: &InitializeOptions, + ) -> Result { #[cfg(windows)] list_windows_video_cards(); let use_gpu = match options.acceleration_mode { AccelerationMode::Auto => { - let supported_devices = SupportedDevices::create()?; + let supported_devices = onnxruntime.supported_devices()?; if cfg!(feature = "directml") { *supported_devices.dml() @@ -157,7 +164,7 @@ pub(crate) mod blocking { AccelerationMode::Gpu => true, }; - if use_gpu && !can_support_gpu_feature()? { + if use_gpu && !can_support_gpu_feature(onnxruntime)? { return Err(ErrorRepr::GpuSupport.into()); } @@ -169,13 +176,16 @@ pub(crate) mod blocking { let heavy_session_options = InferenceSessionOptions::new(options.cpu_num_threads, use_gpu); - let status = Status::new(InferenceDomainMap { - talk: enum_map! { - TalkOperation::PredictDuration - | TalkOperation::PredictIntonation => light_session_options, - TalkOperation::Decode => heavy_session_options, + let status = Status::new( + onnxruntime, + InferenceDomainMap { + talk: enum_map! { + TalkOperation::PredictDuration + | TalkOperation::PredictIntonation => light_session_options, + TalkOperation::Decode => heavy_session_options, + }, }, - }); + ); return Ok(Self { status, @@ -184,8 +194,8 @@ pub(crate) mod blocking { use_gpu, }); - fn can_support_gpu_feature() -> Result { - let supported_devices = SupportedDevices::create()?; + fn can_support_gpu_feature(onnxruntime: &crate::blocking::Onnxruntime) -> Result { + let supported_devices = onnxruntime.supported_devices()?; if cfg!(feature = "directml") { Ok(*supported_devices.dml()) @@ -195,6 +205,10 @@ pub(crate) mod blocking { } } + pub fn onnxruntime(&self) -> &'static crate::blocking::Onnxruntime { + self.status.rt + } + /// ハードウェアアクセラレーションがGPUモードか判定する。 pub fn is_gpu_mode(&self) -> bool { self.use_gpu @@ -437,13 +451,13 @@ pub(crate) mod blocking { /// /// # Example /// - #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537 - #[cfg_attr(not(windows), doc = "```")] + /// ``` /// # #[tokio::main] /// # async fn main() -> anyhow::Result<()> { /// # let synthesizer = /// # voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model( /// # test_util::SAMPLE_VOICE_MODEL_FILE_PATH, + /// # test_util::ONNXRUNTIME_DYLIB_PATH, /// # test_util::OPEN_JTALK_DIC_DIR, /// # ) /// # .await?; @@ -677,13 +691,13 @@ pub(crate) mod blocking { /// /// # Example /// - #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537 - #[cfg_attr(not(windows), doc = "```")] + /// ``` /// # #[tokio::main] /// # async fn main() -> anyhow::Result<()> { /// # let synthesizer = /// # voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model( /// # test_util::SAMPLE_VOICE_MODEL_FILE_PATH, + /// # test_util::ONNXRUNTIME_DYLIB_PATH, /// # test_util::OPEN_JTALK_DIC_DIR, /// # ) /// # .await?; @@ -726,13 +740,13 @@ pub(crate) mod blocking { /// /// # Example /// - #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537 - #[cfg_attr(not(windows), doc = "```")] + /// ``` /// # #[tokio::main] /// # async fn main() -> anyhow::Result<()> { /// # let synthesizer = /// # voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model( /// # test_util::SAMPLE_VOICE_MODEL_FILE_PATH, + /// # test_util::ONNXRUNTIME_DYLIB_PATH, /// # test_util::OPEN_JTALK_DIC_DIR, /// # ) /// # .await?; @@ -759,13 +773,13 @@ pub(crate) mod blocking { /// /// # Examples /// - #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537 - #[cfg_attr(not(windows), doc = "```")] + /// ``` /// # #[tokio::main] /// # async fn main() -> anyhow::Result<()> { /// # let synthesizer = /// # voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model( /// # test_util::SAMPLE_VOICE_MODEL_FILE_PATH, + /// # test_util::ONNXRUNTIME_DYLIB_PATH, /// # test_util::OPEN_JTALK_DIC_DIR, /// # ) /// # .await?; @@ -1135,12 +1149,20 @@ pub(crate) mod tokio { // FIXME: docを書く impl self::Synthesizer { - pub fn new(open_jtalk: O, options: &InitializeOptions) -> Result { - super::blocking::Synthesizer::new(open_jtalk, options) + pub fn new( + onnxruntime: &'static crate::tokio::Onnxruntime, + open_jtalk: O, + options: &InitializeOptions, + ) -> Result { + super::blocking::Synthesizer::new(&onnxruntime.0, open_jtalk, options) .map(Into::into) .map(Self) } + pub fn onnxruntime(&self) -> &'static crate::tokio::Onnxruntime { + crate::tokio::Onnxruntime::from_blocking(self.0.onnxruntime()) + } + pub fn is_gpu_mode(&self) -> bool { self.0.is_gpu_mode() } @@ -1305,6 +1327,9 @@ mod tests { #[tokio::test] async fn load_model_works(#[case] expected_result_at_initialized: Result<()>) { let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::Onnxruntime::from_test_util_data() + .await + .unwrap(), (), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, @@ -1328,6 +1353,9 @@ mod tests { #[tokio::test] async fn is_use_gpu_works() { let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::Onnxruntime::from_test_util_data() + .await + .unwrap(), (), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, @@ -1344,6 +1372,9 @@ mod tests { async fn is_loaded_model_by_style_id_works(#[case] style_id: u32, #[case] expected: bool) { let style_id = StyleId::new(style_id); let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::Onnxruntime::from_test_util_data() + .await + .unwrap(), (), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, @@ -1372,6 +1403,9 @@ mod tests { #[tokio::test] async fn predict_duration_works() { let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::Onnxruntime::from_test_util_data() + .await + .unwrap(), (), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, @@ -1403,6 +1437,9 @@ mod tests { #[tokio::test] async fn predict_intonation_works() { let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::Onnxruntime::from_test_util_data() + .await + .unwrap(), (), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, @@ -1442,6 +1479,9 @@ mod tests { #[tokio::test] async fn decode_works() { let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::Onnxruntime::from_test_util_data() + .await + .unwrap(), (), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, @@ -1534,6 +1574,9 @@ mod tests { #[case] expected_kana_text: &str, ) { let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::Onnxruntime::from_test_util_data() + .await + .unwrap(), crate::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) .await .unwrap(), @@ -1604,6 +1647,9 @@ mod tests { #[case] expected_text_consonant_vowel_data: &TextConsonantVowelData, ) { let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::Onnxruntime::from_test_util_data() + .await + .unwrap(), crate::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) .await .unwrap(), @@ -1671,6 +1717,9 @@ mod tests { #[tokio::test] async fn create_accent_phrases_works_for_japanese_commas_and_periods() { let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::Onnxruntime::from_test_util_data() + .await + .unwrap(), crate::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) .await .unwrap(), @@ -1732,6 +1781,9 @@ mod tests { #[tokio::test] async fn mora_length_works() { let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::Onnxruntime::from_test_util_data() + .await + .unwrap(), crate::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) .await .unwrap(), @@ -1770,6 +1822,9 @@ mod tests { #[tokio::test] async fn mora_pitch_works() { let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::Onnxruntime::from_test_util_data() + .await + .unwrap(), crate::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) .await .unwrap(), @@ -1804,6 +1859,9 @@ mod tests { #[tokio::test] async fn mora_data_works() { let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::Onnxruntime::from_test_util_data() + .await + .unwrap(), crate::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) .await .unwrap(), diff --git a/crates/voicevox_core/src/tokio.rs b/crates/voicevox_core/src/tokio.rs index 49451a310..1e2fabada 100644 --- a/crates/voicevox_core/src/tokio.rs +++ b/crates/voicevox_core/src/tokio.rs @@ -1,6 +1,13 @@ //! Tokio版API。 pub use crate::{ - engine::open_jtalk::tokio::OpenJtalk, synthesizer::tokio::Synthesizer, - user_dict::dict::tokio::UserDict, voice_model::tokio::VoiceModel, + engine::open_jtalk::tokio::OpenJtalk, infer::runtimes::onnxruntime::tokio::Onnxruntime, + synthesizer::tokio::Synthesizer, user_dict::dict::tokio::UserDict, + voice_model::tokio::VoiceModel, }; + +pub mod onnxruntime { + #[cfg(feature = "load-onnxruntime")] + #[cfg_attr(docsrs, doc(cfg(feature = "load-onnxruntime")))] + pub use crate::infer::runtimes::onnxruntime::tokio::LoadOnce; +} diff --git a/crates/voicevox_core_c_api/Cargo.toml b/crates/voicevox_core_c_api/Cargo.toml index f10e2a6f4..29b66e55a 100644 --- a/crates/voicevox_core_c_api/Cargo.toml +++ b/crates/voicevox_core_c_api/Cargo.toml @@ -13,6 +13,8 @@ harness = false name = "e2e" [features] +load-onnxruntime = ["voicevox_core/load-onnxruntime"] +link-onnxruntime = ["voicevox_core/link-onnxruntime"] cuda = ["voicevox_core/cuda"] directml = ["voicevox_core/directml"] @@ -22,6 +24,7 @@ anstyle-query.workspace = true camino.workspace = true chrono = { workspace = true, default-features = false, features = ["clock"] } colorchoice.workspace = true +const_format.workspace = true cstr.workspace = true derive-getters.workspace = true easy-ext.workspace = true @@ -30,6 +33,7 @@ itertools.workspace = true libc.workspace = true once_cell.workspace = true process_path.workspace = true +ref-cast.workspace = true serde_json = { workspace = true, features = ["preserve_order"] } thiserror.workspace = true tracing.workspace = true @@ -55,6 +59,7 @@ tempfile.workspace = true test_util.workspace = true toml.workspace = true typetag.workspace = true +voicevox-ort.workspace = true [lints.rust] unsafe_code = "allow" # C APIのための操作 diff --git a/crates/voicevox_core_c_api/build.rs b/crates/voicevox_core_c_api/build.rs index 535e73676..6b0934882 100644 --- a/crates/voicevox_core_c_api/build.rs +++ b/crates/voicevox_core_c_api/build.rs @@ -1,3 +1,4 @@ +// TODO: voicevox_onnxruntimeになったらやめる fn main() { #[cfg(target_os = "linux")] println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN"); diff --git a/crates/voicevox_core_c_api/cbindgen.toml b/crates/voicevox_core_c_api/cbindgen.toml index 7615280f6..c377daa20 100644 --- a/crates/voicevox_core_c_api/cbindgen.toml +++ b/crates/voicevox_core_c_api/cbindgen.toml @@ -9,6 +9,19 @@ header = """ * 無料で使える中品質なテキスト読み上げソフトウェア、VOICEVOXのコア。 * *
+ *
+ * Availability + *
+ * + *
+ * ヘッダによって次の二つのマクロのうちどちらかが存在する。[リリース](https://github.com/voicevox/voicevox_core/releases)されているライブラリではiOSでのみ`VOICEVOX_LINK_ONNXRUNTIME`が、他のプラットフォームでは`VOICEVOX_LOAD_ONNXRUNTIME`が存在する。 + * + * - `VOICEVOX_LOAD_ONNXRUNTIME`: ::voicevox_onnxruntime_load_once と、それに付属するアイテムが利用可能になる。 + * - `VOICEVOX_LINK_ONNXRUNTIME`: ::voicevox_onnxruntime_init_once が利用可能になる。またこのマクロが存在するなら、このライブラリはONNX Runtimeをロード時動的リンクする。 + *
+ *
+ * + *
*
* ⚠️ Safety *
@@ -55,7 +68,18 @@ after_includes = """ #else // __cplusplus #include #include -#endif // __cplusplus""" +#endif // __cplusplus + +//#define VOICEVOX_LINK_ONNXRUNTIME +//#define VOICEVOX_LOAD_ONNXRUNTIME + +#if !(defined(VOICEVOX_LINK_ONNXRUNTIME) || defined(VOICEVOX_LOAD_ONNXRUNTIME)) +#error "either `VOICEVOX_LINK_ONNXRUNTIME` or `VOICEVOX_LOAD_ONNXRUNTIME` must be enabled" +#endif + +#if defined(VOICEVOX_LINK_ONNXRUNTIME) && defined(VOICEVOX_LOAD_ONNXRUNTIME) +#error "`VOICEVOX_LINK_ONNXRUNTIME` or `VOICEVOX_LOAD_ONNXRUNTIME` cannot be enabled at the same time" +#endif""" # Code Style Options @@ -78,3 +102,7 @@ rename_variants = "ScreamingSnakeCase" [parse] parse_deps = true include = ["voicevox_core"] + +[defines] +"feature = load-onnxruntime" = "VOICEVOX_LOAD_ONNXRUNTIME" +"feature = link-onnxruntime" = "VOICEVOX_LINK_ONNXRUNTIME" diff --git a/crates/voicevox_core_c_api/include/voicevox_core.h b/crates/voicevox_core_c_api/include/voicevox_core.h index da921c869..fe19a4c2e 100644 --- a/crates/voicevox_core_c_api/include/voicevox_core.h +++ b/crates/voicevox_core_c_api/include/voicevox_core.h @@ -4,6 +4,19 @@ * 無料で使える中品質なテキスト読み上げソフトウェア、VOICEVOXのコア。 * *
+ *
+ * Availability + *
+ * + *
+ * ヘッダによって次の二つのマクロのうちどちらかが存在する。[リリース](https://github.com/voicevox/voicevox_core/releases)されているライブラリではiOSでのみ`VOICEVOX_LINK_ONNXRUNTIME`が、他のプラットフォームでは`VOICEVOX_LOAD_ONNXRUNTIME`が存在する。 + * + * - `VOICEVOX_LOAD_ONNXRUNTIME`: ::voicevox_onnxruntime_load_once と、それに付属するアイテムが利用可能になる。 + * - `VOICEVOX_LINK_ONNXRUNTIME`: ::voicevox_onnxruntime_init_once が利用可能になる。またこのマクロが存在するなら、このライブラリはONNX Runtimeをロード時動的リンクする。 + *
+ *
+ * + *
*
* ⚠️ Safety *
@@ -53,6 +66,17 @@ #include #endif // __cplusplus +//#define VOICEVOX_LINK_ONNXRUNTIME +//#define VOICEVOX_LOAD_ONNXRUNTIME + +#if !(defined(VOICEVOX_LINK_ONNXRUNTIME) || defined(VOICEVOX_LOAD_ONNXRUNTIME)) +#error "either `VOICEVOX_LINK_ONNXRUNTIME` or `VOICEVOX_LOAD_ONNXRUNTIME` must be enabled" +#endif + +#if defined(VOICEVOX_LINK_ONNXRUNTIME) && defined(VOICEVOX_LOAD_ONNXRUNTIME) +#error "`VOICEVOX_LINK_ONNXRUNTIME` or `VOICEVOX_LOAD_ONNXRUNTIME` cannot be enabled at the same time" +#endif + /** * ハードウェアアクセラレーションモードを設定する設定値。 */ @@ -102,6 +126,10 @@ enum VoicevoxResultCode * GPUモードがサポートされていない */ VOICEVOX_RESULT_GPU_SUPPORT_ERROR = 4, + /** + * 推論ライブラリのロードまたは初期化ができなかった + */ + VOICEVOX_RESULT_INIT_INFERENCE_RUNTIME_ERROR = 29, /** * スタイルIDに対するスタイルが見つからなかった */ @@ -239,6 +267,21 @@ typedef int32_t VoicevoxUserDictWordType; */ typedef struct OpenJtalkRc OpenJtalkRc; +/** + * ONNX Runtime。 + * + * シングルトンであり、インスタンスは高々一つ。 + * + * ```c + * const VoicevoxOnnxruntime *ort1; + * voicevox_onnxruntime_load_once(voicevox_make_default_load_onnxruntime_options, + * &ort1); + * const VoicevoxOnnxruntime *ort2 = voicevox_onnxruntime_get(); + * assert(ort1 == ort2); + * ``` + */ +typedef struct VoicevoxOnnxruntime VoicevoxOnnxruntime; + /** * 音声シンセサイザ。 * @@ -259,6 +302,24 @@ typedef struct VoicevoxUserDict VoicevoxUserDict; */ typedef struct VoicevoxVoiceModel VoicevoxVoiceModel; +#if defined(VOICEVOX_LOAD_ONNXRUNTIME) +/** + * ::voicevox_onnxruntime_load_once のオプション。 + * + * \availability{ + * [リリース](https://github.com/voicevox/voicevox_core/releases)されているライブラリではiOSを除くプラットフォームで利用可能。詳細はファイルレベルの"Availability"の節を参照。 + * } + */ +typedef struct VoicevoxLoadOnnxruntimeOptions { + /** + * ONNX Runtimeのファイル名(モジュール名)もしくはファイルパスを指定する。 + * + * `dlopen`/[`LoadLibraryExW`](https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-loadlibraryexw)の引数に使われる。デフォルトは ::voicevox_get_onnxruntime_lib_versioned_filename と同じ。 + */ + const char *filename; +} VoicevoxLoadOnnxruntimeOptions; +#endif + /** * ::voicevox_synthesizer_new のオプション。 */ @@ -336,6 +397,115 @@ typedef struct VoicevoxUserDictWord { extern "C" { #endif // __cplusplus +#if defined(VOICEVOX_LOAD_ONNXRUNTIME) +/** + * ONNX Runtimeの動的ライブラリの、バージョン付きのファイル名。 + * + * WindowsとAndroidでは ::voicevox_get_onnxruntime_lib_unversioned_filename と同じ。 + * + * \availability{ + * [リリース](https://github.com/voicevox/voicevox_core/releases)されているライブラリではiOSを除くプラットフォームで利用可能。詳細はファイルレベルの"Availability"の節を参照。 + * } + */ +#ifdef _WIN32 +__declspec(dllimport) +#endif +const char *voicevox_get_onnxruntime_lib_versioned_filename(void); +#endif + +#if defined(VOICEVOX_LOAD_ONNXRUNTIME) +/** + * ONNX Runtimeの動的ライブラリの、バージョン無しのファイル名。 + * + * \availability{ + * [リリース](https://github.com/voicevox/voicevox_core/releases)されているライブラリではiOSを除くプラットフォームで利用可能。詳細はファイルレベルの"Availability"の節を参照。 + * } + */ +#ifdef _WIN32 +__declspec(dllimport) +#endif +const char *voicevox_get_onnxruntime_lib_unversioned_filename(void); +#endif + +#if defined(VOICEVOX_LOAD_ONNXRUNTIME) +/** + * デフォルトの ::voicevox_onnxruntime_load_once のオプションを生成する。 + * + * @return デフォルトの ::voicevox_onnxruntime_load_once のオプション + * + * \availability{ + * [リリース](https://github.com/voicevox/voicevox_core/releases)されているライブラリではiOSを除くプラットフォームで利用可能。詳細はファイルレベルの"Availability"の節を参照。 + * } + */ +#ifdef _WIN32 +__declspec(dllimport) +#endif +struct VoicevoxLoadOnnxruntimeOptions voicevox_make_default_load_onnxruntime_options(void); +#endif + +/** + * ::VoicevoxOnnxruntime のインスタンスが既に作られているならそれを得る。 + * + * 作られていなければ`NULL`を返す。 + * + * @returns ::VoicevoxOnnxruntime のインスタンス + */ +#ifdef _WIN32 +__declspec(dllimport) +#endif +const struct VoicevoxOnnxruntime *voicevox_onnxruntime_get(void); + +#if defined(VOICEVOX_LOAD_ONNXRUNTIME) +/** + * ONNX Runtimeをロードして初期化する。 + * + * 一度成功したら、以後は引数を無視して同じ参照を返す。 + * + * @param [in] options オプション + * @param [out] out_onnxruntime ::VoicevoxOnnxruntime のインスタンス + * + * @returns 結果コード + * + * \availability{ + * [リリース](https://github.com/voicevox/voicevox_core/releases)されているライブラリではiOSを除くプラットフォームで利用可能。詳細はファイルレベルの"Availability"の節を参照。 + * } + * + * \safety{ + * - `options.filename`はヌル終端文字列を指し、かつ読み込みについて有効でなければならない。 + * - `out_onnxruntime`は書き込みについて有効でなければならない。 + * } + */ +#ifdef _WIN32 +__declspec(dllimport) +#endif +VoicevoxResultCode voicevox_onnxruntime_load_once(struct VoicevoxLoadOnnxruntimeOptions options, + const struct VoicevoxOnnxruntime **out_onnxruntime); +#endif + +#if defined(VOICEVOX_LINK_ONNXRUNTIME) +/** + * ONNX Runtimeを初期化する。 + * + * 一度成功したら以後は同じ参照を返す。 + * + * @param [out] out_onnxruntime ::VoicevoxOnnxruntime のインスタンス + * + * @returns 結果コード + * + * \availability{ + * [リリース](https://github.com/voicevox/voicevox_core/releases)されているライブラリではiOSでのみ利用可能。詳細はファイルレベルの"Availability"の節を参照。 + * } + * + * \safety{ + * - `out_onnxruntime`は書き込みについて有効でなければならない。 + * } + */ +#ifdef _WIN32 +__declspec(dllimport) +#endif +VoicevoxResultCode voicevox_onnxruntime_init_once(const struct VoicevoxOnnxruntime **out_onnxruntime); +#endif + /** * ::OpenJtalkRc を構築(_construct_)する。 * @@ -492,6 +662,7 @@ void voicevox_voice_model_delete(struct VoicevoxVoiceModel *model); /** * ::VoicevoxSynthesizer を構築(_construct_)する。 * + * @param [in] onnxruntime * @param [in] open_jtalk Open JTalkのオブジェクト * @param [in] options オプション * @param [out] out_synthesizer 構築先 @@ -499,6 +670,7 @@ void voicevox_voice_model_delete(struct VoicevoxVoiceModel *model); * @returns 結果コード * * \safety{ + * - `onnxruntime`は ::voicevox_onnxruntime_load_once または ::voicevox_onnxruntime_init_once で得たものでなければならない。 * - `open_jtalk`は ::voicevox_voice_model_new_from_path で得たものでなければならず、また ::voicevox_open_jtalk_rc_new で解放されていてはいけない。 * - `out_synthesizer`は書き込みについて有効でなければならない。 * } @@ -506,7 +678,8 @@ void voicevox_voice_model_delete(struct VoicevoxVoiceModel *model); #ifdef _WIN32 __declspec(dllimport) #endif -VoicevoxResultCode voicevox_synthesizer_new(const struct OpenJtalkRc *open_jtalk, +VoicevoxResultCode voicevox_synthesizer_new(const struct VoicevoxOnnxruntime *onnxruntime, + const struct OpenJtalkRc *open_jtalk, struct VoicevoxInitializeOptions options, struct VoicevoxSynthesizer **out_synthesizer); @@ -563,6 +736,22 @@ __declspec(dllimport) VoicevoxResultCode voicevox_synthesizer_unload_voice_model(const struct VoicevoxSynthesizer *synthesizer, VoicevoxVoiceModelId model_id); +/** + * ::VoicevoxOnnxruntime のインスタンスを得る。 + * + * @param [in] synthesizer 音声シンセサイザ + * + * @returns ::VoicevoxOnnxruntime のインスタンス + * + * \safety{ + * - `synthesizer`は ::voicevox_synthesizer_new で得たものでなければならず、また ::voicevox_synthesizer_delete で解放されていてはいけない。 + * } + */ +#ifdef _WIN32 +__declspec(dllimport) +#endif +const struct VoicevoxOnnxruntime *voicevox_synthesizer_get_onnxruntime(const struct VoicevoxSynthesizer *synthesizer); + /** * ハードウェアアクセラレーションがGPUモードか判定する。 * @@ -623,6 +812,7 @@ char *voicevox_synthesizer_create_metas_json(const struct VoicevoxSynthesizer *s * * あくまで本ライブラリが対応しているデバイスの情報であることに注意。GPUが使える環境ではなかったとしても`cuda`や`dml`は`true`を示しうる。 * + * @param [in] onnxruntime * @param [out] output_supported_devices_json サポートデバイス情報のJSON文字列 * * @returns 結果コード @@ -630,18 +820,20 @@ char *voicevox_synthesizer_create_metas_json(const struct VoicevoxSynthesizer *s * \example{ * ```c * char *supported_devices; - * VoicevoxResultCode result = voicevox_create_supported_devices_json(&supported_devices); + * VoicevoxResultCode result = voicevox_onnxruntime_create_supported_devices_json(onnxruntime, &supported_devices); * ``` * } * * \safety{ + * - `onnxruntime`は ::voicevox_onnxruntime_load_once または ::voicevox_onnxruntime_init_once で得たものでなければならない。 * - `output_supported_devices_json`は書き込みについて有効でなければならない。 * } */ #ifdef _WIN32 __declspec(dllimport) #endif -VoicevoxResultCode voicevox_create_supported_devices_json(char **output_supported_devices_json); +VoicevoxResultCode voicevox_onnxruntime_create_supported_devices_json(const struct VoicevoxOnnxruntime *onnxruntime, + char **output_supported_devices_json); /** * AquesTalk風記法から、AudioQueryをJSONとして生成する。 @@ -980,7 +1172,7 @@ VoicevoxResultCode voicevox_synthesizer_tts(const struct VoicevoxSynthesizer *sy * * \safety{ * - `json`は以下のAPIで得られたポインタでなくてはいけない。 - * - ::voicevox_create_supported_devices_json + * - ::voicevox_onnxruntime_create_supported_devices_json * - ::voicevox_synthesizer_create_metas_json * - ::voicevox_synthesizer_create_audio_query * - ::voicevox_synthesizer_create_accent_phrases diff --git a/crates/voicevox_core_c_api/src/c_impls.rs b/crates/voicevox_core_c_api/src/c_impls.rs index fe4afcf65..1adc402cd 100644 --- a/crates/voicevox_core_c_api/src/c_impls.rs +++ b/crates/voicevox_core_c_api/src/c_impls.rs @@ -1,13 +1,64 @@ use std::{ffi::CString, path::Path}; use camino::Utf8Path; +use ref_cast::ref_cast_custom; use voicevox_core::{InitializeOptions, Result, VoiceModelId}; -use crate::{helpers::CApiResult, OpenJtalkRc, VoicevoxSynthesizer, VoicevoxVoiceModel}; +use crate::{ + helpers::CApiResult, OpenJtalkRc, VoicevoxOnnxruntime, VoicevoxSynthesizer, VoicevoxVoiceModel, +}; // FIXME: 中身(Rust API)を直接操作するかラッパーメソッド越しにするのかが混在していて、一貫性を // 欠いている +impl VoicevoxOnnxruntime { + #[cfg(feature = "load-onnxruntime")] + pub(crate) fn lib_versioned_filename() -> &'static std::ffi::CStr { + to_cstr!(voicevox_core::blocking::Onnxruntime::LIB_VERSIONED_FILENAME) + } + + #[cfg(feature = "load-onnxruntime")] + pub(crate) fn lib_unversioned_filename() -> &'static std::ffi::CStr { + to_cstr!(voicevox_core::blocking::Onnxruntime::LIB_UNVERSIONED_FILENAME) + } + + #[ref_cast_custom] + fn new(rust: &voicevox_core::blocking::Onnxruntime) -> &Self; + + pub(crate) fn get() -> Option<&'static Self> { + voicevox_core::blocking::Onnxruntime::get().map(Self::new) + } + + #[cfg(feature = "load-onnxruntime")] + pub(crate) fn load_once(filename: &std::ffi::CStr) -> CApiResult<&'static Self> { + use crate::helpers::ensure_utf8; + + let inner = voicevox_core::blocking::Onnxruntime::load_once() + .filename(ensure_utf8(filename)?) + .exec()?; + Ok(Self::new(inner)) + } + + #[cfg(feature = "link-onnxruntime")] + pub(crate) fn init_once() -> CApiResult<&'static Self> { + let inner = voicevox_core::blocking::Onnxruntime::init_once()?; + Ok(Self::new(inner)) + } +} + +#[cfg(feature = "load-onnxruntime")] +macro_rules! to_cstr { + ($s:expr) => {{ + const __RUST_STR: &str = $s; + static __C_STR: &[u8] = const_format::concatcp!(__RUST_STR, '\0').as_bytes(); + + std::ffi::CStr::from_bytes_with_nul(__C_STR) + .unwrap_or_else(|e| panic!("{__RUST_STR:?} should not contain `\\0`: {e}")) + }}; +} +#[cfg(feature = "load-onnxruntime")] +use to_cstr; + impl OpenJtalkRc { pub(crate) fn new(open_jtalk_dic_dir: impl AsRef) -> Result { Ok(Self { @@ -17,12 +68,23 @@ impl OpenJtalkRc { } impl VoicevoxSynthesizer { - pub(crate) fn new(open_jtalk: &OpenJtalkRc, options: &InitializeOptions) -> Result { - let synthesizer = - voicevox_core::blocking::Synthesizer::new(open_jtalk.open_jtalk.clone(), options)?; + pub(crate) fn new( + onnxruntime: &'static VoicevoxOnnxruntime, + open_jtalk: &OpenJtalkRc, + options: &InitializeOptions, + ) -> Result { + let synthesizer = voicevox_core::blocking::Synthesizer::new( + &onnxruntime.0, + open_jtalk.open_jtalk.clone(), + options, + )?; Ok(Self { synthesizer }) } + pub(crate) fn onnxruntime(&self) -> &'static VoicevoxOnnxruntime { + VoicevoxOnnxruntime::new(self.synthesizer.onnxruntime()) + } + pub(crate) fn load_voice_model( &self, model: &voicevox_core::blocking::VoiceModel, diff --git a/crates/voicevox_core_c_api/src/compatible_engine.rs b/crates/voicevox_core_c_api/src/compatible_engine.rs index ae4d21a93..bd66210e2 100644 --- a/crates/voicevox_core_c_api/src/compatible_engine.rs +++ b/crates/voicevox_core_c_api/src/compatible_engine.rs @@ -8,11 +8,9 @@ use std::{ use libc::c_int; use once_cell::sync::Lazy; -use voicevox_core::{ - StyleId, SupportedDevices, VoiceModelId, __internal::interop::PerformInference as _, -}; +use voicevox_core::{StyleId, VoiceModelId, __internal::interop::PerformInference as _}; -use crate::init_logger_once; +use crate::{helpers::display_error, init_logger_once}; macro_rules! ensure_initialized { ($synthesizer:expr $(,)?) => { @@ -28,6 +26,15 @@ macro_rules! ensure_initialized { static ERROR_MESSAGE: Lazy> = Lazy::new(|| Mutex::new(String::new())); +static ONNXRUNTIME: Lazy<&'static voicevox_core::blocking::Onnxruntime> = Lazy::new(|| { + voicevox_core::blocking::Onnxruntime::load_once() + .exec() + .unwrap_or_else(|err| { + display_error(&err); + panic!("ONNX Runtimeをロードもしくは初期化ができなかったため、クラッシュします"); + }) +}); + struct VoiceModelSet { all_vvms: Vec, all_metas_json: CString, @@ -111,6 +118,7 @@ pub extern "C" fn initialize(use_gpu: bool, cpu_num_threads: c_int, load_all_mod init_logger_once(); let result = (|| { let synthesizer = voicevox_core::blocking::Synthesizer::new( + *ONNXRUNTIME, (), &voicevox_core::InitializeOptions { acceleration_mode: if use_gpu { @@ -196,7 +204,14 @@ pub extern "C" fn supported_devices() -> *const c_char { return SUPPORTED_DEVICES.as_ptr(); static SUPPORTED_DEVICES: Lazy = Lazy::new(|| { - CString::new(SupportedDevices::create().unwrap().to_json().to_string()).unwrap() + CString::new( + ONNXRUNTIME + .supported_devices() + .unwrap() + .to_json() + .to_string(), + ) + .unwrap() }); } diff --git a/crates/voicevox_core_c_api/src/helpers.rs b/crates/voicevox_core_c_api/src/helpers.rs index ac0cab286..5066b643a 100644 --- a/crates/voicevox_core_c_api/src/helpers.rs +++ b/crates/voicevox_core_c_api/src/helpers.rs @@ -1,5 +1,5 @@ use easy_ext::ext; -use std::{error::Error as _, ffi::CStr, fmt::Debug, iter}; +use std::{ffi::CStr, fmt::Debug, iter}; use uuid::Uuid; use voicevox_core::{AudioQueryModel, UserDictWord, VoiceModelId}; @@ -19,14 +19,6 @@ pub(crate) fn into_result_code_with_error(result: CApiResult<()>) -> VoicevoxRes } return into_result_code(result); - fn display_error(err: &CApiError) { - itertools::chain( - [err.to_string()], - iter::successors(err.source(), |&e| e.source()).map(|e| format!("Caused by: {e}")), - ) - .for_each(|msg| error!("{msg}")); - } - fn into_result_code(result: CApiResult<()>) -> VoicevoxResultCode { use voicevox_core::ErrorKind::*; use CApiError::*; @@ -37,6 +29,7 @@ pub(crate) fn into_result_code_with_error(result: CApiResult<()>) -> VoicevoxRes Err(RustApi(err)) => match err.kind() { NotLoadedOpenjtalkDict => VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR, GpuSupport => VOICEVOX_RESULT_GPU_SUPPORT_ERROR, + InitInferenceRuntime => VOICEVOX_RESULT_INIT_INFERENCE_RUNTIME_ERROR, OpenZipFile => VOICEVOX_RESULT_OPEN_ZIP_FILE_ERROR, ReadZipEntry => VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR, InvalidModelFormat => VOICEVOX_RESULT_INVALID_MODEL_HEADER_ERROR, @@ -63,6 +56,14 @@ pub(crate) fn into_result_code_with_error(result: CApiResult<()>) -> VoicevoxRes } } +pub(crate) fn display_error(err: &impl std::error::Error) { + itertools::chain( + [err.to_string()], + iter::successors(err.source(), |&e| e.source()).map(|e| format!("Caused by: {e}")), + ) + .for_each(|msg| error!("{msg}")); +} + pub(crate) type CApiResult = std::result::Result; #[derive(Error, Debug)] diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index eb2e4698e..964e635ab 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -4,6 +4,7 @@ mod c_impls; /// cbindgen:ignore +#[cfg(feature = "load-onnxruntime")] mod compatible_engine; mod drop_check; mod helpers; @@ -20,6 +21,7 @@ use anstream::{AutoStream, RawStream}; use chrono::SecondsFormat; use colorchoice::ColorChoice; use derive_getters::Getters; +use ref_cast::RefCastCustom; use std::env; use std::ffi::{CStr, CString}; use std::fmt; @@ -32,7 +34,7 @@ use tracing_subscriber::EnvFilter; use uuid::Uuid; use voicevox_core::__internal::interop::IdRef as _; use voicevox_core::{AccentPhraseModel, AudioQueryModel, TtsOptions, UserDictWord}; -use voicevox_core::{StyleId, SupportedDevices, SynthesisOptions}; +use voicevox_core::{StyleId, SynthesisOptions}; fn init_logger_once() { static ONCE: Once = Once::new(); @@ -60,13 +62,7 @@ fn init_logger_once() { .with_env_filter(if env::var_os(EnvFilter::DEFAULT_ENV).is_some() { EnvFilter::from_default_env() } else { - pub const ORT_LOGGING_LEVEL: &str = if cfg!(debug_assertions) { - "info" - } else { - "warn" - }; - format!("error,voicevox_core=info,voicevox_core_c_api=info,ort={ORT_LOGGING_LEVEL}") - .into() + "error,voicevox_core=info,voicevox_core_c_api=info,ort=info".into() }) .with_timer(local_time as fn(&mut Writer<'_>) -> _) .with_ansi(ansi) @@ -92,6 +88,171 @@ fn init_logger_once() { * voicevox_core/publish.rsにある対応する関数とはこのファイルに定義してある公開関数からvoicevoxプレフィックスを取り除いた名前の関数である */ +// TODO: https://github.com/mozilla/cbindgen/issues/927 +//#[cfg(feature = "load-onnxruntime")] +//pub const VOICEVOX_ONNXRUNTIME_LIB_NAME: &CStr = ..; +//#[cfg(feature = "load-onnxruntime")] +//pub const VOICEVOX_ONNXRUNTIME_LIB_VERSION: &CStr = ..; + +/// ONNX Runtimeの動的ライブラリの、バージョン付きのファイル名。 +/// +/// WindowsとAndroidでは ::voicevox_get_onnxruntime_lib_unversioned_filename と同じ。 +/// +/// \availability{ +/// [リリース](https://github.com/voicevox/voicevox_core/releases)されているライブラリではiOSを除くプラットフォームで利用可能。詳細はファイルレベルの"Availability"の節を参照。 +/// } +#[cfg(feature = "load-onnxruntime")] +#[no_mangle] +pub extern "C" fn voicevox_get_onnxruntime_lib_versioned_filename() -> *const c_char { + init_logger_once(); + let filename = VoicevoxOnnxruntime::lib_versioned_filename(); + C_STRING_DROP_CHECKER.blacklist(filename).as_ptr() +} + +/// ONNX Runtimeの動的ライブラリの、バージョン無しのファイル名。 +/// +/// \availability{ +/// [リリース](https://github.com/voicevox/voicevox_core/releases)されているライブラリではiOSを除くプラットフォームで利用可能。詳細はファイルレベルの"Availability"の節を参照。 +/// } +#[cfg(feature = "load-onnxruntime")] +#[no_mangle] +pub extern "C" fn voicevox_get_onnxruntime_lib_unversioned_filename() -> *const c_char { + init_logger_once(); + let filename = VoicevoxOnnxruntime::lib_unversioned_filename(); + C_STRING_DROP_CHECKER.blacklist(filename).as_ptr() +} + +/// ::voicevox_onnxruntime_load_once のオプション。 +/// +/// \availability{ +/// [リリース](https://github.com/voicevox/voicevox_core/releases)されているライブラリではiOSを除くプラットフォームで利用可能。詳細はファイルレベルの"Availability"の節を参照。 +/// } +#[cfg(feature = "load-onnxruntime")] +#[repr(C)] +pub struct VoicevoxLoadOnnxruntimeOptions { + /// ONNX Runtimeのファイル名(モジュール名)もしくはファイルパスを指定する。 + /// + /// `dlopen`/[`LoadLibraryExW`](https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-loadlibraryexw)の引数に使われる。デフォルトは ::voicevox_get_onnxruntime_lib_versioned_filename と同じ。 + filename: *const c_char, +} + +/// デフォルトの ::voicevox_onnxruntime_load_once のオプションを生成する。 +/// +/// @return デフォルトの ::voicevox_onnxruntime_load_once のオプション +/// +/// \availability{ +/// [リリース](https://github.com/voicevox/voicevox_core/releases)されているライブラリではiOSを除くプラットフォームで利用可能。詳細はファイルレベルの"Availability"の節を参照。 +/// } +#[cfg(feature = "load-onnxruntime")] +#[no_mangle] +pub extern "C" fn voicevox_make_default_load_onnxruntime_options() -> VoicevoxLoadOnnxruntimeOptions +{ + init_logger_once(); + let filename = VoicevoxOnnxruntime::lib_versioned_filename(); + let filename = C_STRING_DROP_CHECKER.blacklist(filename).as_ptr(); + VoicevoxLoadOnnxruntimeOptions { filename } +} + +// https://github.com/mozilla/cbindgen/issues/967 +// FIXME: このコードブロックのコードが動くかどうか未確認 +/// ONNX Runtime。 +/// +/// シングルトンであり、インスタンスは高々一つ。 +/// +/// ```c +/// const VoicevoxOnnxruntime *ort1; +/// voicevox_onnxruntime_load_once(voicevox_make_default_load_onnxruntime_options, +/// &ort1); +/// const VoicevoxOnnxruntime *ort2 = voicevox_onnxruntime_get(); +/// assert(ort1 == ort2); +/// ``` +#[cfg(any())] +pub struct VoicevoxOnnxruntime(!); + +/// cbindgen:ignore +#[derive(RefCastCustom)] +#[repr(transparent)] +pub struct VoicevoxOnnxruntime(voicevox_core::blocking::Onnxruntime); + +/// ::VoicevoxOnnxruntime のインスタンスが既に作られているならそれを得る。 +/// +/// 作られていなければ`NULL`を返す。 +/// +/// @returns ::VoicevoxOnnxruntime のインスタンス +#[no_mangle] +pub extern "C" fn voicevox_onnxruntime_get() -> Option<&'static VoicevoxOnnxruntime> { + VoicevoxOnnxruntime::get() +} + +/// ONNX Runtimeをロードして初期化する。 +/// +/// 一度成功したら、以後は引数を無視して同じ参照を返す。 +/// +/// @param [in] options オプション +/// @param [out] out_onnxruntime ::VoicevoxOnnxruntime のインスタンス +/// +/// @returns 結果コード +/// +/// \availability{ +/// [リリース](https://github.com/voicevox/voicevox_core/releases)されているライブラリではiOSを除くプラットフォームで利用可能。詳細はファイルレベルの"Availability"の節を参照。 +/// } +/// +/// \safety{ +/// - `options.filename`はヌル終端文字列を指し、かつ読み込みについて有効でなければならない。 +/// - `out_onnxruntime`は書き込みについて有効でなければならない。 +/// } +#[cfg(feature = "load-onnxruntime")] +#[no_mangle] +pub unsafe extern "C" fn voicevox_onnxruntime_load_once( + options: VoicevoxLoadOnnxruntimeOptions, + out_onnxruntime: NonNull<&'static VoicevoxOnnxruntime>, +) -> VoicevoxResultCode { + init_logger_once(); + let filename = unsafe { + // SAFETY: ユーザーに要求している条件で十分 + CStr::from_ptr(options.filename) + }; + into_result_code_with_error((|| { + let instance = VoicevoxOnnxruntime::load_once(filename)?; + unsafe { + // SAFETY: ユーザーに要求している条件で十分 + out_onnxruntime.as_ptr().write_unaligned(instance); + } + Ok(()) + })()) +} + +/// ONNX Runtimeを初期化する。 +/// +/// 一度成功したら以後は同じ参照を返す。 +/// +/// @param [out] out_onnxruntime ::VoicevoxOnnxruntime のインスタンス +/// +/// @returns 結果コード +/// +/// \availability{ +/// [リリース](https://github.com/voicevox/voicevox_core/releases)されているライブラリではiOSでのみ利用可能。詳細はファイルレベルの"Availability"の節を参照。 +/// } +/// +/// \safety{ +/// - `out_onnxruntime`は書き込みについて有効でなければならない。 +/// } +#[cfg(feature = "link-onnxruntime")] +#[no_mangle] +pub unsafe extern "C" fn voicevox_onnxruntime_init_once( + out_onnxruntime: NonNull<&'static VoicevoxOnnxruntime>, +) -> VoicevoxResultCode { + init_logger_once(); + into_result_code_with_error((|| { + let instance = VoicevoxOnnxruntime::init_once()?; + unsafe { + // SAFETY: ユーザーに要求している条件で十分 + out_onnxruntime.as_ptr().write_unaligned(instance); + } + Ok(()) + })()) +} + /// テキスト解析器としてのOpen JTalk。 /// /// 構築(_construction_)は ::voicevox_open_jtalk_rc_new で行い、破棄(_destruction_)は ::voicevox_open_jtalk_rc_delete で行う。 @@ -226,6 +387,7 @@ pub extern "C" fn voicevox_get_version() -> *const c_char { init_logger_once(); return C_STRING_DROP_CHECKER.blacklist(VERSION).as_ptr(); + // FIXME: 実行時チェックにすることでこの`unsafe`は削れるはず const VERSION: &CStr = unsafe { // SAFETY: The package version is a SemVer, so it should not contain '\0' CStr::from_bytes_with_nul_unchecked(concat!(env!("CARGO_PKG_VERSION"), '\0').as_bytes()) @@ -330,6 +492,7 @@ pub struct VoicevoxSynthesizer { /// ::VoicevoxSynthesizer を構築(_construct_)する。 /// +/// @param [in] onnxruntime /// @param [in] open_jtalk Open JTalkのオブジェクト /// @param [in] options オプション /// @param [out] out_synthesizer 構築先 @@ -337,11 +500,13 @@ pub struct VoicevoxSynthesizer { /// @returns 結果コード /// /// \safety{ +/// - `onnxruntime`は ::voicevox_onnxruntime_load_once または ::voicevox_onnxruntime_init_once で得たものでなければならない。 /// - `open_jtalk`は ::voicevox_voice_model_new_from_path で得たものでなければならず、また ::voicevox_open_jtalk_rc_new で解放されていてはいけない。 /// - `out_synthesizer`は書き込みについて有効でなければならない。 /// } #[no_mangle] pub unsafe extern "C" fn voicevox_synthesizer_new( + onnxruntime: &'static VoicevoxOnnxruntime, open_jtalk: &OpenJtalkRc, options: VoicevoxInitializeOptions, out_synthesizer: NonNull>, @@ -350,7 +515,7 @@ pub unsafe extern "C" fn voicevox_synthesizer_new( into_result_code_with_error((|| { let options = options.into(); - let synthesizer = VoicevoxSynthesizer::new(open_jtalk, &options)?.into(); + let synthesizer = VoicevoxSynthesizer::new(onnxruntime, open_jtalk, &options)?.into(); out_synthesizer.as_ptr().write_unaligned(synthesizer); Ok(()) })()) @@ -411,6 +576,22 @@ pub extern "C" fn voicevox_synthesizer_unload_voice_model( into_result_code_with_error(synthesizer.unload_voice_model(model_id).map_err(Into::into)) } +/// ::VoicevoxOnnxruntime のインスタンスを得る。 +/// +/// @param [in] synthesizer 音声シンセサイザ +/// +/// @returns ::VoicevoxOnnxruntime のインスタンス +/// +/// \safety{ +/// - `synthesizer`は ::voicevox_synthesizer_new で得たものでなければならず、また ::voicevox_synthesizer_delete で解放されていてはいけない。 +/// } +#[no_mangle] +pub extern "C" fn voicevox_synthesizer_get_onnxruntime( + synthesizer: &VoicevoxSynthesizer, +) -> &'static VoicevoxOnnxruntime { + synthesizer.onnxruntime() +} + /// ハードウェアアクセラレーションがGPUモードか判定する。 /// /// @param [in] synthesizer 音声シンセサイザ @@ -473,6 +654,7 @@ pub extern "C" fn voicevox_synthesizer_create_metas_json( /// /// あくまで本ライブラリが対応しているデバイスの情報であることに注意。GPUが使える環境ではなかったとしても`cuda`や`dml`は`true`を示しうる。 /// +/// @param [in] onnxruntime /// @param [out] output_supported_devices_json サポートデバイス情報のJSON文字列 /// /// @returns 結果コード @@ -480,21 +662,23 @@ pub extern "C" fn voicevox_synthesizer_create_metas_json( /// \example{ /// ```c /// char *supported_devices; -/// VoicevoxResultCode result = voicevox_create_supported_devices_json(&supported_devices); +/// VoicevoxResultCode result = voicevox_onnxruntime_create_supported_devices_json(onnxruntime, &supported_devices); /// ``` /// } /// /// \safety{ +/// - `onnxruntime`は ::voicevox_onnxruntime_load_once または ::voicevox_onnxruntime_init_once で得たものでなければならない。 /// - `output_supported_devices_json`は書き込みについて有効でなければならない。 /// } #[no_mangle] -pub unsafe extern "C" fn voicevox_create_supported_devices_json( +pub unsafe extern "C" fn voicevox_onnxruntime_create_supported_devices_json( + onnxruntime: &'static VoicevoxOnnxruntime, output_supported_devices_json: NonNull<*mut c_char>, ) -> VoicevoxResultCode { init_logger_once(); into_result_code_with_error((|| { let supported_devices = - CString::new(SupportedDevices::create()?.to_json().to_string()).unwrap(); + CString::new(onnxruntime.0.supported_devices()?.to_json().to_string()).unwrap(); output_supported_devices_json.as_ptr().write_unaligned( C_STRING_DROP_CHECKER .whitelist(supported_devices) @@ -980,7 +1164,7 @@ pub unsafe extern "C" fn voicevox_synthesizer_tts( /// /// \safety{ /// - `json`は以下のAPIで得られたポインタでなくてはいけない。 -/// - ::voicevox_create_supported_devices_json +/// - ::voicevox_onnxruntime_create_supported_devices_json /// - ::voicevox_synthesizer_create_metas_json /// - ::voicevox_synthesizer_create_audio_query /// - ::voicevox_synthesizer_create_accent_phrases diff --git a/crates/voicevox_core_c_api/src/result_code.rs b/crates/voicevox_core_c_api/src/result_code.rs index 0897dfa87..085f2b8c6 100644 --- a/crates/voicevox_core_c_api/src/result_code.rs +++ b/crates/voicevox_core_c_api/src/result_code.rs @@ -17,6 +17,8 @@ pub enum VoicevoxResultCode { VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR = 3, /// GPUモードがサポートされていない VOICEVOX_RESULT_GPU_SUPPORT_ERROR = 4, + /// 推論ライブラリのロードまたは初期化ができなかった + VOICEVOX_RESULT_INIT_INFERENCE_RUNTIME_ERROR = 29, /// スタイルIDに対するスタイルが見つからなかった VOICEVOX_RESULT_STYLE_NOT_FOUND_ERROR = 6, /// 音声モデルIDに対する音声モデルが見つからなかった @@ -69,6 +71,9 @@ pub(crate) const fn error_result_to_message(result_code: VoicevoxResultCode) -> VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR => { cstr!("サポートされているデバイス情報取得中にエラーが発生しました") } + VOICEVOX_RESULT_INIT_INFERENCE_RUNTIME_ERROR => { + cstr!("推論ライブラリのロードまたは初期化ができませんでした") + } VOICEVOX_RESULT_OK => cstr!("エラーが発生しませんでした"), VOICEVOX_RESULT_STYLE_NOT_FOUND_ERROR => cstr!( "指定されたIDに対するスタイルが見つかりませんでした。音声モデルが読み込まれていないか\ diff --git a/crates/voicevox_core_c_api/tests/e2e/assert_cdylib.rs b/crates/voicevox_core_c_api/tests/e2e/assert_cdylib.rs index cfbec5c31..e779fdc23 100644 --- a/crates/voicevox_core_c_api/tests/e2e/assert_cdylib.rs +++ b/crates/voicevox_core_c_api/tests/e2e/assert_cdylib.rs @@ -8,6 +8,7 @@ use assert_cmd::assert::{Assert, AssertResult, OutputAssertExt as _}; use clap::Parser as _; use duct::cmd; use easy_ext::ext; +use itertools::Itertools as _; use libloading::Library; use libtest_mimic::{Failed, Trial}; @@ -46,7 +47,15 @@ pub(crate) fn exec() -> anyhow::Result<()> { // テスト対象が無いときに`cargo build`をスキップしたいが、判定部分がプライベート。 // そのためスキップするのはCLIオプションに`--ignored`か`--include-ignored`が無いときのみ if args.ignored || args.include_ignored { - cmd!(env!("CARGO"), "build", "--release", "--lib").run()?; + cmd!( + env!("CARGO"), + "build", + "--release", + "--lib", + "--features", + &format!(",{}", C::FEATURES.iter().format(",")), + ) + .run()?; ensure!( C::cdylib_path().exists(), @@ -96,6 +105,7 @@ pub(crate) fn exec() -> anyhow::Result<()> { } pub(crate) trait TestContext { + const FEATURES: &'static [&'static str]; const TARGET_DIR: &'static str; const CDYLIB_NAME: &'static str; const RUNTIME_ENVS: &'static [(&'static str, &'static str)]; diff --git a/crates/voicevox_core_c_api/tests/e2e/log_mask.rs b/crates/voicevox_core_c_api/tests/e2e/log_mask.rs index b28442bdb..9b08c9af7 100644 --- a/crates/voicevox_core_c_api/tests/e2e/log_mask.rs +++ b/crates/voicevox_core_c_api/tests/e2e/log_mask.rs @@ -20,6 +20,13 @@ impl Utf8Output { ) } + pub(crate) fn mask_onnxruntime_version(self) -> Self { + self.mask_stderr( + static_regex!(regex::escape(ort::downloaded_version!())), + "{onnxruntime_version}", + ) + } + pub(crate) fn mask_windows_video_cards(self) -> Self { self.mask_stderr( static_regex!( diff --git a/crates/voicevox_core_c_api/tests/e2e/main.rs b/crates/voicevox_core_c_api/tests/e2e/main.rs index 43dc3a95e..0a7520c76 100644 --- a/crates/voicevox_core_c_api/tests/e2e/main.rs +++ b/crates/voicevox_core_c_api/tests/e2e/main.rs @@ -22,6 +22,7 @@ fn main() -> anyhow::Result<()> { enum TestContext {} impl assert_cdylib::TestContext for TestContext { + const FEATURES: &'static [&'static str] = &["load-onnxruntime"]; const TARGET_DIR: &'static str = "../../target"; const CDYLIB_NAME: &'static str = "voicevox_core"; const RUNTIME_ENVS: &'static [(&'static str, &'static str)] = diff --git a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml index 151074cb3..b623119dd 100644 --- a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml +++ b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml @@ -51,9 +51,12 @@ metas = ''' } ]''' stderr.windows = ''' +{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' {windows-video-cards} ''' -stderr.unix = "" +stderr.unix = ''' +{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' +''' [compatible_engine_load_model_before_initialize] last_error_message = "Statusが初期化されていません" @@ -83,14 +86,21 @@ result_messages.22 = "ユーザー辞書に単語が見つかりませんでし result_messages.23 = "OpenJTalkのユーザー辞書の設定に失敗しました" result_messages.24 = "ユーザー辞書の単語のバリデーションに失敗しました" result_messages.25 = "UUIDの変換に失敗しました" -stderr = "" +# FIXME: 26, 27, 28が抜けている +result_messages.29 = "推論ライブラリのロードまたは初期化ができませんでした" +stderr = ''' +{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' +''' [simple_tts] output."こんにちは、音声合成の世界へようこそ".wav_length = 176172 stderr.windows = ''' +{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' {windows-video-cards} ''' -stderr.unix = "" +stderr.unix = ''' +{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' +''' [synthesizer_new_output_json] metas = ''' @@ -145,22 +155,32 @@ metas = ''' } ]''' stderr.windows = ''' +{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' {windows-video-cards} ''' -stderr.unix = "" +stderr.unix = ''' +{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' +''' [tts_via_audio_query] output."こんにちは、音声合成の世界へようこそ".wav_length = 176172 stderr.windows = ''' +{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' {windows-video-cards} ''' -stderr.unix = "" +stderr.unix = ''' +{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' +''' +# FIXME: "user_dict_load"のはず [user_dict] stderr.windows = ''' +{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' {windows-video-cards} ''' -stderr.unix = "" +stderr.unix = ''' +{timestamp} INFO ort: Loaded ONNX Runtime dylib with version '{onnxruntime_version}' +''' [user_dict_manipulate] stderr = "" diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine.rs index c2fc211c2..79b1d5dc8 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine.rs @@ -31,12 +31,10 @@ impl assert_cdylib::TestCase for TestCase { serde_json::to_string_pretty(&metas_json.parse::()?).unwrap() }; - let supported_devices = { + { let supported_devices = lib.supported_devices(); - CStr::from_ptr(supported_devices) - .to_str()? - .parse::()? - }; + serde_json::from_str::(CStr::from_ptr(supported_devices).to_str()?)?; + } assert!(lib.initialize(false, 0, false)); @@ -86,10 +84,6 @@ impl assert_cdylib::TestCase for TestCase { }; std::assert_eq!(SNAPSHOTS.metas, metas_json); - std::assert_eq!( - SupportedDevices::create().unwrap().to_json(), - supported_devices, - ); float_assert::close_l1(&phoneme_length, &EXAMPLE_DATA.duration.result, 0.01); float_assert::close_l1(&intonation_list, &EXAMPLE_DATA.intonation.result, 0.01); @@ -103,6 +97,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() + .mask_onnxruntime_version() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine_load_model_before_initialize.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine_load_model_before_initialize.rs index 173e32f8c..18482f0de 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine_load_model_before_initialize.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/compatible_engine_load_model_before_initialize.rs @@ -34,6 +34,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() + .mask_onnxruntime_version() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/global_info.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/global_info.rs index c6ea390ed..d2a8fc7e7 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/global_info.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/global_info.rs @@ -28,16 +28,23 @@ impl assert_cdylib::TestCase for TestCase { CStr::from_ptr(lib.voicevox_get_version()).to_str()?, ); + let onnxruntime = { + let mut onnxruntime = MaybeUninit::uninit(); + assert_ok(lib.voicevox_onnxruntime_load_once( + lib.voicevox_make_default_load_onnxruntime_options(), + onnxruntime.as_mut_ptr(), + )); + onnxruntime.assume_init() + }; + { let mut supported_devices = MaybeUninit::uninit(); - assert_ok(lib.voicevox_create_supported_devices_json(supported_devices.as_mut_ptr())); + assert_ok(lib.voicevox_onnxruntime_create_supported_devices_json( + onnxruntime, + supported_devices.as_mut_ptr(), + )); let supported_devices = supported_devices.assume_init(); - std::assert_eq!( - SupportedDevices::create()?.to_json(), - CStr::from_ptr(supported_devices) - .to_str()? - .parse::()?, - ); + serde_json::from_str::(CStr::from_ptr(supported_devices).to_str()?)?; lib.voicevox_json_free(supported_devices); } @@ -83,6 +90,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() + .mask_onnxruntime_version() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/simple_tts.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/simple_tts.rs index 4ac4030e1..4635b0271 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/simple_tts.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/simple_tts.rs @@ -37,6 +37,15 @@ impl assert_cdylib::TestCase for TestCase { model.assume_init() }; + let onnxruntime = { + let mut onnxruntime = MaybeUninit::uninit(); + assert_ok(lib.voicevox_onnxruntime_load_once( + lib.voicevox_make_default_load_onnxruntime_options(), + onnxruntime.as_mut_ptr(), + )); + onnxruntime.assume_init() + }; + let openjtalk = { let mut openjtalk = MaybeUninit::uninit(); let open_jtalk_dic_dir = CString::new(OPEN_JTALK_DIC_DIR).unwrap(); @@ -49,6 +58,7 @@ impl assert_cdylib::TestCase for TestCase { let synthesizer = { let mut synthesizer = MaybeUninit::uninit(); assert_ok(lib.voicevox_synthesizer_new( + onnxruntime, openjtalk, VoicevoxInitializeOptions { acceleration_mode: @@ -96,6 +106,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() + .mask_onnxruntime_version() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_output_json.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_output_json.rs index c27bd4703..8a2fd1951 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_output_json.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_output_json.rs @@ -28,6 +28,15 @@ impl assert_cdylib::TestCase for TestCase { unsafe fn exec(&self, lib: Library) -> anyhow::Result<()> { let lib = CApi::from_library(lib)?; + let onnxruntime = { + let mut onnxruntime = MaybeUninit::uninit(); + assert_ok(lib.voicevox_onnxruntime_load_once( + lib.voicevox_make_default_load_onnxruntime_options(), + onnxruntime.as_mut_ptr(), + )); + onnxruntime.assume_init() + }; + let openjtalk = { let mut openjtalk = MaybeUninit::uninit(); let open_jtalk_dic_dir = CString::new(OPEN_JTALK_DIC_DIR).unwrap(); @@ -40,6 +49,7 @@ impl assert_cdylib::TestCase for TestCase { let synthesizer = { let mut synthesizer = MaybeUninit::uninit(); assert_ok(lib.voicevox_synthesizer_new( + onnxruntime, openjtalk, VoicevoxInitializeOptions { acceleration_mode: @@ -85,6 +95,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() + .mask_onnxruntime_version() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/tts_via_audio_query.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/tts_via_audio_query.rs index d380b71b2..728f21339 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/tts_via_audio_query.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/tts_via_audio_query.rs @@ -37,6 +37,15 @@ impl assert_cdylib::TestCase for TestCase { model.assume_init() }; + let onnxruntime = { + let mut onnxruntime = MaybeUninit::uninit(); + assert_ok(lib.voicevox_onnxruntime_load_once( + lib.voicevox_make_default_load_onnxruntime_options(), + onnxruntime.as_mut_ptr(), + )); + onnxruntime.assume_init() + }; + let openjtalk = { let mut openjtalk = MaybeUninit::uninit(); let open_jtalk_dic_dir = CString::new(OPEN_JTALK_DIC_DIR).unwrap(); @@ -49,6 +58,7 @@ impl assert_cdylib::TestCase for TestCase { let synthesizer = { let mut synthesizer = MaybeUninit::uninit(); assert_ok(lib.voicevox_synthesizer_new( + onnxruntime, openjtalk, VoicevoxInitializeOptions { acceleration_mode: @@ -108,6 +118,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() + .mask_onnxruntime_version() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_load.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_load.rs index 2e6875e97..646c5647b 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_load.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_load.rs @@ -54,6 +54,15 @@ impl assert_cdylib::TestCase for TestCase { model.assume_init() }; + let onnxruntime = { + let mut onnxruntime = MaybeUninit::uninit(); + assert_ok(lib.voicevox_onnxruntime_load_once( + lib.voicevox_make_default_load_onnxruntime_options(), + onnxruntime.as_mut_ptr(), + )); + onnxruntime.assume_init() + }; + let openjtalk = { let mut openjtalk = MaybeUninit::uninit(); let open_jtalk_dic_dir = CString::new(OPEN_JTALK_DIC_DIR).unwrap(); @@ -66,6 +75,7 @@ impl assert_cdylib::TestCase for TestCase { let synthesizer = { let mut synthesizer = MaybeUninit::uninit(); assert_ok(lib.voicevox_synthesizer_new( + onnxruntime, openjtalk, VoicevoxInitializeOptions { acceleration_mode: @@ -125,6 +135,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() + .mask_onnxruntime_version() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_manipulate.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_manipulate.rs index fd3d575e3..bec0d61b5 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_manipulate.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/user_dict_manipulate.rs @@ -140,6 +140,7 @@ impl assert_cdylib::TestCase for TestCase { fn assert_output(&self, output: Utf8Output) -> AssertResult { output .mask_timestamps() + .mask_onnxruntime_version() .mask_windows_video_cards() .assert() .try_success()? diff --git a/crates/voicevox_core_java_api/Cargo.toml b/crates/voicevox_core_java_api/Cargo.toml index b39a98b2a..e9cced8b2 100644 --- a/crates/voicevox_core_java_api/Cargo.toml +++ b/crates/voicevox_core_java_api/Cargo.toml @@ -15,6 +15,7 @@ directml = ["voicevox_core/directml"] android_logger.workspace = true chrono = { workspace = true, default-features = false, features = ["clock"] } derive_more.workspace = true +duplicate.workspace = true easy-ext.workspace = true jni.workspace = true once_cell.workspace = true @@ -24,7 +25,7 @@ serde_json = { workspace = true, features = ["preserve_order"] } tracing = { workspace = true, features = ["log"] } tracing-subscriber = { workspace = true, features = ["env-filter"] } uuid.workspace = true -voicevox_core.workspace = true +voicevox_core = { workspace = true, features = ["load-onnxruntime"] } [lints.rust] unsafe_code = "allow" # jni-rsが要求 diff --git a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/GlobalInfo.java b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/GlobalInfo.java index 26f9ccddd..010e69073 100644 --- a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/GlobalInfo.java +++ b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/GlobalInfo.java @@ -1,6 +1,5 @@ package jp.hiroshiba.voicevoxcore; -import com.google.gson.Gson; import com.google.gson.annotations.Expose; import com.google.gson.annotations.SerializedName; import jakarta.annotation.Nonnull; @@ -17,28 +16,13 @@ public static String getVersion() { return rsGetVersion(); } - /** - * このライブラリで利用可能なデバイスの情報を取得する。 - * - * @return {@link SupportedDevices}。 - */ - @Nonnull - public static SupportedDevices getSupportedDevices() { - Gson gson = new Gson(); - String supportedDevicesJson = rsGetSupportedDevicesJson(); - SupportedDevices supportedDevices = gson.fromJson(supportedDevicesJson, SupportedDevices.class); - if (supportedDevices == null) { - throw new NullPointerException("supported_devices"); - } - return supportedDevices; - } - @Nonnull private static native String rsGetVersion(); @Nonnull private static native String rsGetSupportedDevicesJson(); + // FIXME: `Onnxruntime`に移すか、独立させる /** * このライブラリで利用可能なデバイスの情報。 * diff --git a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Onnxruntime.java b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Onnxruntime.java new file mode 100644 index 000000000..9a9cbe133 --- /dev/null +++ b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Onnxruntime.java @@ -0,0 +1,132 @@ +package jp.hiroshiba.voicevoxcore; + +import static jp.hiroshiba.voicevoxcore.GlobalInfo.SupportedDevices; + +import com.google.gson.Gson; +import jakarta.annotation.Nonnull; +import jakarta.annotation.Nullable; +import java.util.Optional; + +/** + * ONNX Runtime。 + * + *

シングルトンであり、インスタンスは高々一つ。 + * + *

+ * Onnxruntime ort1 = Onnxruntime.loadOnce().exec();
+ * Onnxruntime ort2 = Onnxruntime.get().get();
+ * assert ort1 == ort2;
+ * 
+ */ +public class Onnxruntime extends Dll { + /** ONNX Runtimeのライブラリ名。 */ + public static final String LIB_NAME = "onnxruntime"; + + /** 推奨されるONNX Runtimeのバージョン。 */ + public static final String LIB_VERSION = "1.17.3"; + + /** + * {@link LIB_NAME}と{@link LIB_VERSION}からなる動的ライブラリのファイル名。 + * + *

WindowsとAndroidでは{@link LIB_UNVERSIONED_FILENAME}と同じ。 + */ + public static final String LIB_VERSIONED_FILENAME = rsLibVersionedFilename(); + + /** {@link LIB_NAME}からなる動的ライブラリのファイル名。 */ + public static final String LIB_UNVERSIONED_FILENAME = rsLibUnversionedFilename(); + + @Nullable private static Onnxruntime instance = null; + + /** + * インスタンスが既に作られているならそれを得る。 + * + * @return インスタンスがあるなら{@code Optional.of(…)}、そうでなければ{@code Optional.empty()}。 + */ + public static Optional get() { + synchronized (Onnxruntime.class) { + return Optional.ofNullable(instance); + } + } + + /** + * ONNX Runtimeをロードして初期化する。 + * + *

一度成功したら、以後は引数を無視して同じインスタンスを返す。 + * + * @return {@link LoadOnce}。 + */ + public static LoadOnce loadOnce() { + return new LoadOnce(); + } + + private static native String rsLibName(); + + private static native String rsLibVersion(); + + private static native String rsLibVersionedFilename(); + + private static native String rsLibUnversionedFilename(); + + static { + assert LIB_NAME.equals(rsLibName()) && LIB_VERSION.equals(rsLibVersion()); + } + + /** {@link #loadOnce}のビルダー。 */ + public static class LoadOnce { + /** + * ONNX Runtimeのファイル名(モジュール名)もしくはファイルパスを指定する。 + * + * @param filename {@code dlopen}/{@code + * LoadLibraryExW}の引数に使われる。デフォルトは{@link LIB_VERSIONED_FILENAME}。 + * @return このオブジェクト。 + */ + public LoadOnce filename(@Nonnull String filename) { + this.filename = filename; + return this; + } + + /** + * 実行する。 + * + * @return {@link Onnxruntime}。 + */ + public Onnxruntime exec() { + synchronized (Onnxruntime.class) { + if (instance == null) { + instance = new Onnxruntime(filename); + } + } + return instance; + } + + private LoadOnce() {} + + @Nonnull private String filename = LIB_VERSIONED_FILENAME; + } + + private long handle; + + private Onnxruntime(@Nullable String filename) { + rsNew(filename); + } + + /** + * このライブラリで利用可能なデバイスの情報を取得する。 + * + * @return {@link SupportedDevices}。 + */ + public SupportedDevices supportedDevices() { + Gson gson = new Gson(); + String supportedDevicesJson = rsSupportedDevices(); + SupportedDevices supportedDevices = gson.fromJson(supportedDevicesJson, SupportedDevices.class); + if (supportedDevices == null) { + throw new NullPointerException("supported_devices"); + } + return supportedDevices; + } + + private native void rsNew(@Nullable String filename); + + private native String rsSupportedDevices(); +} diff --git a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Synthesizer.java b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Synthesizer.java index 6ec6d9108..8d5694267 100644 --- a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Synthesizer.java +++ b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/Synthesizer.java @@ -5,6 +5,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Optional; import java.util.UUID; import jp.hiroshiba.voicevoxcore.exceptions.InferenceFailedException; import jp.hiroshiba.voicevoxcore.exceptions.InvalidModelDataException; @@ -17,8 +18,8 @@ public class Synthesizer extends Dll { private long handle; - private Synthesizer(OpenJtalk openJtalk, Builder builder) { - rsNew(openJtalk, builder); + private Synthesizer(Onnxruntime onnxruntime, OpenJtalk openJtalk, Builder builder) { + rsNew(onnxruntime, openJtalk, builder); } protected void finalize() throws Throwable { @@ -26,6 +27,18 @@ protected void finalize() throws Throwable { super.finalize(); } + /** + * ONNX Runtime。 + * + * @return {@link Onnxruntime}。 + */ + @Nonnull + public Onnxruntime getOnnxruntime() { + Optional onnxruntime = Onnxruntime.get(); + assert onnxruntime.isPresent() : "`Synthesizer`のコンストラクタで要求しているはず"; + return onnxruntime.get(); + } + /** * ハードウェアアクセラレーションがGPUモードかどうかを返す。 * @@ -266,7 +279,7 @@ public TtsConfigurator tts(String text, int styleId) { return new TtsConfigurator(this, text, styleId); } - private native void rsNew(OpenJtalk openJtalk, Builder builder); + private native void rsNew(Onnxruntime onnxruntime, OpenJtalk openJtalk, Builder builder); private native boolean rsIsGpuMode(); @@ -320,8 +333,8 @@ private native byte[] rsTts(String text, int styleId, boolean enableInterrogativ private native void rsDrop(); - public static Builder builder(OpenJtalk openJtalk) { - return new Builder(openJtalk); + public static Builder builder(Onnxruntime onnxruntime, OpenJtalk openJtalk) { + return new Builder(onnxruntime, openJtalk); } /** @@ -330,6 +343,7 @@ public static Builder builder(OpenJtalk openJtalk) { * @see Synthesizer#builder */ public static class Builder { + private Onnxruntime onnxruntime; private OpenJtalk openJtalk; @SuppressWarnings("unused") @@ -338,7 +352,8 @@ public static class Builder { @SuppressWarnings("unused") private int cpuNumThreads; - public Builder(OpenJtalk openJtalk) { + public Builder(Onnxruntime onnxruntime, OpenJtalk openJtalk) { + this.onnxruntime = onnxruntime; this.openJtalk = openJtalk; } @@ -373,7 +388,7 @@ public Builder cpuNumThreads(int cpuNumThreads) { * @return {@link Synthesizer}。 */ public Synthesizer build() { - Synthesizer synthesizer = new Synthesizer(openJtalk, this); + Synthesizer synthesizer = new Synthesizer(onnxruntime, openJtalk, this); return synthesizer; } } diff --git a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/exceptions/InitInferenceRuntimeException.java b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/exceptions/InitInferenceRuntimeException.java new file mode 100644 index 000000000..c981ea034 --- /dev/null +++ b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/exceptions/InitInferenceRuntimeException.java @@ -0,0 +1,14 @@ +package jp.hiroshiba.voicevoxcore.exceptions; + +import java.io.IOException; + +/** 推論ライブラリのロードまたは初期化ができなかった。 */ +public class InitInferenceRuntimeException extends IOException { + public InitInferenceRuntimeException(String message) { + super(message); + } + + public InitInferenceRuntimeException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/InfoTest.java b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/InfoTest.java index 52915abad..c9e71ed1c 100644 --- a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/InfoTest.java +++ b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/InfoTest.java @@ -8,15 +8,16 @@ import org.junit.jupiter.api.Test; -class InfoTest { +class InfoTest extends TestUtils { @Test void checkVersion() { assertNotNull(GlobalInfo.getVersion()); } + // TODO: 別の場所に移す @Test void checkSupportedDevices() { - GlobalInfo.SupportedDevices supportedDevices = GlobalInfo.getSupportedDevices(); + GlobalInfo.SupportedDevices supportedDevices = loadOnnxruntime().supportedDevices(); assertNotNull(supportedDevices); assertTrue(supportedDevices.cpu); diff --git a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/SynthesizerTest.java b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/SynthesizerTest.java index 1eb8fe057..4de2f617b 100644 --- a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/SynthesizerTest.java +++ b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/SynthesizerTest.java @@ -20,9 +20,12 @@ interface MoraCheckCallback { @Test void checkIsGpuMode() { + Onnxruntime onnxruntime = loadOnnxruntime(); OpenJtalk openJtalk = loadOpenJtalk(); Synthesizer synthesizer = - Synthesizer.builder(openJtalk).accelerationMode(Synthesizer.AccelerationMode.CPU).build(); + Synthesizer.builder(onnxruntime, openJtalk) + .accelerationMode(Synthesizer.AccelerationMode.CPU) + .build(); assertFalse(synthesizer.isGpuMode()); } @@ -45,9 +48,10 @@ boolean checkAllMoras( @Test void checkModel() throws InvalidModelDataException { + Onnxruntime onnxruntime = loadOnnxruntime(); VoiceModel model = loadModel(); OpenJtalk openJtalk = loadOpenJtalk(); - Synthesizer synthesizer = Synthesizer.builder(openJtalk).build(); + Synthesizer synthesizer = Synthesizer.builder(onnxruntime, openJtalk).build(); assertTrue(synthesizer.metas().length == 0); @@ -65,8 +69,9 @@ void checkModel() throws InvalidModelDataException { @Test void checkAudioQuery() throws InferenceFailedException, InvalidModelDataException { VoiceModel model = loadModel(); + Onnxruntime onnxruntime = loadOnnxruntime(); OpenJtalk openJtalk = loadOpenJtalk(); - Synthesizer synthesizer = Synthesizer.builder(openJtalk).build(); + Synthesizer synthesizer = Synthesizer.builder(onnxruntime, openJtalk).build(); synthesizer.loadVoiceModel(model); AudioQuery query = synthesizer.createAudioQuery("こんにちは", model.metas[0].styles[0].id); @@ -77,7 +82,8 @@ void checkAudioQuery() throws InferenceFailedException, InvalidModelDataExceptio void checkAccentPhrases() throws InferenceFailedException, InvalidModelDataException { VoiceModel model = loadModel(); OpenJtalk openJtalk = loadOpenJtalk(); - Synthesizer synthesizer = Synthesizer.builder(openJtalk).build(); + Onnxruntime onnxruntime = loadOnnxruntime(); + Synthesizer synthesizer = Synthesizer.builder(onnxruntime, openJtalk).build(); synthesizer.loadVoiceModel(model); List accentPhrases = synthesizer.createAccentPhrases("こんにちは", model.metas[0].styles[0].id); @@ -106,8 +112,9 @@ void checkAccentPhrases() throws InferenceFailedException, InvalidModelDataExcep @Test void checkTts() throws InferenceFailedException, InvalidModelDataException { VoiceModel model = loadModel(); + Onnxruntime onnxruntime = loadOnnxruntime(); OpenJtalk openJtalk = loadOpenJtalk(); - Synthesizer synthesizer = Synthesizer.builder(openJtalk).build(); + Synthesizer synthesizer = Synthesizer.builder(onnxruntime, openJtalk).build(); synthesizer.loadVoiceModel(model); synthesizer.tts("こんにちは", model.metas[0].styles[0].id); } diff --git a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/TestUtils.java b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/TestUtils.java index 032c38a3d..9ab731cd9 100644 --- a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/TestUtils.java +++ b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/TestUtils.java @@ -15,6 +15,16 @@ VoiceModel loadModel() { } } + Onnxruntime loadOnnxruntime() { + final String FILENAME = "../../test_util/data/lib/" + Onnxruntime.LIB_VERSIONED_FILENAME; + + try { + return Onnxruntime.loadOnce().filename(FILENAME).exec(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + OpenJtalk loadOpenJtalk() { String cwd = System.getProperty("user.dir"); File path = new File(cwd + "/../../test_util/data/open_jtalk_dic_utf_8-1.11"); diff --git a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/UserDictTest.java b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/UserDictTest.java index ce9b7631a..5300b81f3 100644 --- a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/UserDictTest.java +++ b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/UserDictTest.java @@ -17,8 +17,9 @@ class UserDictTest extends TestUtils { void checkLoad() throws InferenceFailedException, InvalidModelDataException, LoadUserDictException { VoiceModel model = loadModel(); + Onnxruntime onnxruntime = loadOnnxruntime(); OpenJtalk openJtalk = loadOpenJtalk(); - Synthesizer synthesizer = Synthesizer.builder(openJtalk).build(); + Synthesizer synthesizer = Synthesizer.builder(onnxruntime, openJtalk).build(); UserDict userDict = new UserDict(); synthesizer.loadVoiceModel(model); AudioQuery query1 = diff --git a/crates/voicevox_core_java_api/src/common.rs b/crates/voicevox_core_java_api/src/common.rs index 6e13cee89..c55ce8106 100644 --- a/crates/voicevox_core_java_api/src/common.rs +++ b/crates/voicevox_core_java_api/src/common.rs @@ -72,6 +72,7 @@ where let class = class!( NotLoadedOpenjtalkDict, GpuSupport, + InitInferenceRuntime, OpenZipFile, ReadZipEntry, InvalidModelFormat, diff --git a/crates/voicevox_core_java_api/src/info.rs b/crates/voicevox_core_java_api/src/info.rs index 71b8db228..7328d4de8 100644 --- a/crates/voicevox_core_java_api/src/info.rs +++ b/crates/voicevox_core_java_api/src/info.rs @@ -9,14 +9,3 @@ extern "system" fn Java_jp_hiroshiba_voicevoxcore_GlobalInfo_rsGetVersion( Ok(version.into_raw()) }) } -#[no_mangle] -extern "system" fn Java_jp_hiroshiba_voicevoxcore_GlobalInfo_rsGetSupportedDevicesJson( - env: JNIEnv<'_>, -) -> jobject { - throw_if_err(env, std::ptr::null_mut(), |env| { - let supported_devices = voicevox_core::SupportedDevices::create()?; - let json = serde_json::to_string(&supported_devices).expect("Should not fail"); - let json = env.new_string(json)?; - Ok(json.into_raw()) - }) -} diff --git a/crates/voicevox_core_java_api/src/lib.rs b/crates/voicevox_core_java_api/src/lib.rs index 9615f0a94..4d61414e8 100644 --- a/crates/voicevox_core_java_api/src/lib.rs +++ b/crates/voicevox_core_java_api/src/lib.rs @@ -1,6 +1,7 @@ mod common; mod info; mod logger; +mod onnxruntime; mod open_jtalk; mod synthesizer; mod user_dict; diff --git a/crates/voicevox_core_java_api/src/onnxruntime.rs b/crates/voicevox_core_java_api/src/onnxruntime.rs new file mode 100644 index 000000000..004ff4d97 --- /dev/null +++ b/crates/voicevox_core_java_api/src/onnxruntime.rs @@ -0,0 +1,56 @@ +use std::ptr; + +use duplicate::duplicate_item; +use jni::{ + objects::{JObject, JString}, + sys::jobject, + JNIEnv, +}; + +use crate::common::throw_if_err; + +#[duplicate_item( + f CONST; + [ Java_jp_hiroshiba_voicevoxcore_Onnxruntime_rsLibName ] [ LIB_NAME ]; + [ Java_jp_hiroshiba_voicevoxcore_Onnxruntime_rsLibVersion ] [ LIB_VERSION ]; + [ Java_jp_hiroshiba_voicevoxcore_Onnxruntime_rsLibVersionedFilename ] [ LIB_VERSIONED_FILENAME ]; + [ Java_jp_hiroshiba_voicevoxcore_Onnxruntime_rsLibUnversionedFilename ] [ LIB_UNVERSIONED_FILENAME ]; +)] +#[no_mangle] +extern "system" fn f(env: JNIEnv<'_>) -> jobject { + throw_if_err(env, ptr::null_mut(), |env| { + let s = env.new_string(voicevox_core::blocking::Onnxruntime::CONST)?; + Ok(s.into_raw()) + }) +} + +#[no_mangle] +unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Onnxruntime_rsNew<'local>( + env: JNIEnv<'local>, + this: JObject<'local>, + filename: JString<'local>, +) { + throw_if_err(env, (), |env| { + let filename = String::from(env.get_string(&filename)?); + let internal = voicevox_core::blocking::Onnxruntime::load_once() + .filename(filename) + .exec()?; + env.set_rust_field(&this, "handle", internal)?; + Ok(()) + }) +} + +#[no_mangle] +unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Onnxruntime_rsSupportedDevices<'local>( + env: JNIEnv<'local>, + this: JObject<'local>, +) -> jobject { + throw_if_err(env, ptr::null_mut(), |env| { + let this = *env.get_rust_field::<_, _, &'static voicevox_core::blocking::Onnxruntime>( + &this, "handle", + )?; + let json = this.supported_devices()?.to_json().to_string(); + let json = env.new_string(json)?; + Ok(json.into_raw()) + }) +} diff --git a/crates/voicevox_core_java_api/src/synthesizer.rs b/crates/voicevox_core_java_api/src/synthesizer.rs index dc5dc971d..b2605e8ae 100644 --- a/crates/voicevox_core_java_api/src/synthesizer.rs +++ b/crates/voicevox_core_java_api/src/synthesizer.rs @@ -14,6 +14,7 @@ use std::sync::Arc; unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsNew<'local>( env: JNIEnv<'local>, this: JObject<'local>, + onnxruntime: JObject<'local>, open_jtalk: JObject<'local>, builder: JObject<'local>, ) { @@ -45,11 +46,18 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsNew<'loca let cpu_num_threads = env.get_field(&builder, "cpuNumThreads", "I")?; options.cpu_num_threads = cpu_num_threads.i().expect("cpuNumThreads is not integer") as u16; + let onnxruntime = *env + .get_rust_field::<_, _, &'static voicevox_core::blocking::Onnxruntime>( + &onnxruntime, + "handle", + )?; let open_jtalk = env .get_rust_field::<_, _, voicevox_core::blocking::OpenJtalk>(&open_jtalk, "handle")? .clone(); let internal = Arc::new(voicevox_core::blocking::Synthesizer::new( - open_jtalk, &options, + onnxruntime, + open_jtalk, + &options, )?); env.set_rust_field(&this, "handle", internal)?; Ok(()) diff --git a/crates/voicevox_core_python_api/Cargo.toml b/crates/voicevox_core_python_api/Cargo.toml index 5ccd1dc41..48c92dfb4 100644 --- a/crates/voicevox_core_python_api/Cargo.toml +++ b/crates/voicevox_core_python_api/Cargo.toml @@ -15,6 +15,7 @@ directml = ["voicevox_core/directml"] camino.workspace = true easy-ext.workspace = true log.workspace = true +once_cell.workspace = true pyo3 = { workspace = true, features = ["abi3-py38", "extension-module"] } pyo3-asyncio = { workspace = true, features = ["tokio-runtime"] } pyo3-log.workspace = true @@ -22,7 +23,7 @@ serde = { workspace = true, features = ["derive"] } serde_json.workspace = true tracing = { workspace = true, features = ["log"] } uuid.workspace = true -voicevox_core.workspace = true +voicevox_core = { workspace = true, features = ["load-onnxruntime"] } [lints.rust] unsafe_code = "forbid" diff --git a/crates/voicevox_core_python_api/python/test/conftest.py b/crates/voicevox_core_python_api/python/test/conftest.py index eec642cb9..430e415c1 100644 --- a/crates/voicevox_core_python_api/python/test/conftest.py +++ b/crates/voicevox_core_python_api/python/test/conftest.py @@ -4,9 +4,17 @@ from typing import List, TypedDict import pytest +import voicevox_core root_dir = Path(os.path.dirname(os.path.abspath(__file__))) +onnxruntime_filename = str( + root_dir.parent.parent.parent + / "test_util" + / "data" + / "lib" + / voicevox_core.blocking.Onnxruntime.LIB_VERSIONED_FILENAME +) open_jtalk_dic_dir = ( root_dir.parent.parent.parent / "test_util" / "data" / "open_jtalk_dic_utf_8-1.11" ) diff --git a/crates/voicevox_core_python_api/python/test/test_asyncio_metas.py b/crates/voicevox_core_python_api/python/test/test_asyncio_metas.py index ec69032b1..aea4af999 100644 --- a/crates/voicevox_core_python_api/python/test/test_asyncio_metas.py +++ b/crates/voicevox_core_python_api/python/test/test_asyncio_metas.py @@ -7,7 +7,7 @@ import conftest import pytest import pytest_asyncio -from voicevox_core.asyncio import OpenJtalk, Synthesizer, VoiceModel +from voicevox_core.asyncio import Onnxruntime, OpenJtalk, Synthesizer, VoiceModel def test_voice_model_metas_works(voice_model: VoiceModel) -> None: @@ -16,7 +16,10 @@ def test_voice_model_metas_works(voice_model: VoiceModel) -> None: @pytest.mark.asyncio async def test_synthesizer_metas_works(voice_model: VoiceModel) -> None: - synthesizer = Synthesizer(await OpenJtalk.new(conftest.open_jtalk_dic_dir)) + synthesizer = Synthesizer( + await Onnxruntime.load_once(filename=conftest.onnxruntime_filename), + await OpenJtalk.new(conftest.open_jtalk_dic_dir), + ) await synthesizer.load_voice_model(voice_model) _ = synthesizer.metas diff --git a/crates/voicevox_core_python_api/python/test/test_asyncio_user_dict_load.py b/crates/voicevox_core_python_api/python/test/test_asyncio_user_dict_load.py index c509b8c2d..d6906a6ac 100644 --- a/crates/voicevox_core_python_api/python/test/test_asyncio_user_dict_load.py +++ b/crates/voicevox_core_python_api/python/test/test_asyncio_user_dict_load.py @@ -15,9 +15,12 @@ @pytest.mark.asyncio async def test_user_dict_load() -> None: + onnxruntime = await voicevox_core.asyncio.Onnxruntime.load_once( + filename=conftest.onnxruntime_filename + ) open_jtalk = await voicevox_core.asyncio.OpenJtalk.new(conftest.open_jtalk_dic_dir) model = await voicevox_core.asyncio.VoiceModel.from_path(conftest.model_dir) - synthesizer = voicevox_core.asyncio.Synthesizer(open_jtalk) + synthesizer = voicevox_core.asyncio.Synthesizer(onnxruntime, open_jtalk) await synthesizer.load_voice_model(model) diff --git a/crates/voicevox_core_python_api/python/test/test_blocking_metas.py b/crates/voicevox_core_python_api/python/test/test_blocking_metas.py index c305e2cdb..00eade04b 100644 --- a/crates/voicevox_core_python_api/python/test/test_blocking_metas.py +++ b/crates/voicevox_core_python_api/python/test/test_blocking_metas.py @@ -6,7 +6,7 @@ import conftest import pytest -from voicevox_core.blocking import OpenJtalk, Synthesizer, VoiceModel +from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer, VoiceModel def test_voice_model_metas_works(voice_model: VoiceModel) -> None: @@ -14,7 +14,10 @@ def test_voice_model_metas_works(voice_model: VoiceModel) -> None: def test_synthesizer_metas_works(voice_model: VoiceModel) -> None: - synthesizer = Synthesizer(OpenJtalk(conftest.open_jtalk_dic_dir)) + synthesizer = Synthesizer( + Onnxruntime.load_once(filename=conftest.onnxruntime_filename), + OpenJtalk(conftest.open_jtalk_dic_dir), + ) synthesizer.load_voice_model(voice_model) _ = synthesizer.metas diff --git a/crates/voicevox_core_python_api/python/test/test_blocking_user_dict_load.py b/crates/voicevox_core_python_api/python/test/test_blocking_user_dict_load.py index ef94d9742..198becbe2 100644 --- a/crates/voicevox_core_python_api/python/test/test_blocking_user_dict_load.py +++ b/crates/voicevox_core_python_api/python/test/test_blocking_user_dict_load.py @@ -13,9 +13,12 @@ def test_user_dict_load() -> None: + onnxruntime = voicevox_core.blocking.Onnxruntime.load_once( + filename=conftest.onnxruntime_filename + ) open_jtalk = voicevox_core.blocking.OpenJtalk(conftest.open_jtalk_dic_dir) model = voicevox_core.blocking.VoiceModel.from_path(conftest.model_dir) - synthesizer = voicevox_core.blocking.Synthesizer(open_jtalk) + synthesizer = voicevox_core.blocking.Synthesizer(onnxruntime, open_jtalk) synthesizer.load_voice_model(model) diff --git a/crates/voicevox_core_python_api/python/test/test_pseudo_raii_for_asyncio_synthesizer.py b/crates/voicevox_core_python_api/python/test/test_pseudo_raii_for_asyncio_synthesizer.py index 93d92ad28..26d389477 100644 --- a/crates/voicevox_core_python_api/python/test/test_pseudo_raii_for_asyncio_synthesizer.py +++ b/crates/voicevox_core_python_api/python/test/test_pseudo_raii_for_asyncio_synthesizer.py @@ -7,7 +7,7 @@ import conftest import pytest import pytest_asyncio -from voicevox_core.asyncio import OpenJtalk, Synthesizer +from voicevox_core.asyncio import Onnxruntime, OpenJtalk, Synthesizer def test_enter_returns_workable_self(synthesizer: Synthesizer) -> None: @@ -38,8 +38,13 @@ def test_access_after_exit_denied(synthesizer: Synthesizer) -> None: @pytest_asyncio.fixture -async def synthesizer(open_jtalk: OpenJtalk) -> Synthesizer: - return Synthesizer(open_jtalk) +async def synthesizer(onnxruntime: Onnxruntime, open_jtalk: OpenJtalk) -> Synthesizer: + return Synthesizer(onnxruntime, open_jtalk) + + +@pytest_asyncio.fixture(scope="function") +async def onnxruntime() -> Onnxruntime: + return await Onnxruntime.load_once(filename=conftest.onnxruntime_filename) @pytest_asyncio.fixture(scope="function") diff --git a/crates/voicevox_core_python_api/python/test/test_pseudo_raii_for_blocking_synthesizer.py b/crates/voicevox_core_python_api/python/test/test_pseudo_raii_for_blocking_synthesizer.py index 3e3f5f823..dc55eafc4 100644 --- a/crates/voicevox_core_python_api/python/test/test_pseudo_raii_for_blocking_synthesizer.py +++ b/crates/voicevox_core_python_api/python/test/test_pseudo_raii_for_blocking_synthesizer.py @@ -6,7 +6,7 @@ import conftest import pytest -from voicevox_core.blocking import OpenJtalk, Synthesizer +from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer def test_enter_returns_workable_self(synthesizer: Synthesizer) -> None: @@ -37,8 +37,13 @@ def test_access_after_exit_denied(synthesizer: Synthesizer) -> None: @pytest.fixture -def synthesizer(open_jtalk: OpenJtalk) -> Synthesizer: - return Synthesizer(open_jtalk) +def synthesizer(onnxruntime: Onnxruntime, open_jtalk: OpenJtalk) -> Synthesizer: + return Synthesizer(onnxruntime, open_jtalk) + + +@pytest.fixture(scope="session") +def onnxruntime() -> Onnxruntime: + return Onnxruntime.load_once(filename=conftest.onnxruntime_filename) @pytest.fixture(scope="session") diff --git a/crates/voicevox_core_python_api/python/test/test_type_stub_consts.py b/crates/voicevox_core_python_api/python/test/test_type_stub_consts.py new file mode 100644 index 000000000..6a44d2771 --- /dev/null +++ b/crates/voicevox_core_python_api/python/test/test_type_stub_consts.py @@ -0,0 +1,50 @@ +"""pyiに書かれている定数の値が、本物と合致しているかをテストする。""" + +import ast +from ast import AnnAssign, ClassDef, Constant, Name +from pathlib import Path +from typing import Tuple + +import voicevox_core + + +def test() -> None: + REAL_BLOCKING = ( + voicevox_core.blocking.Onnxruntime.LIB_NAME, + voicevox_core.blocking.Onnxruntime.LIB_VERSION, + ) + REAL_ASYNCIO = ( + voicevox_core.asyncio.Onnxruntime.LIB_NAME, + voicevox_core.asyncio.Onnxruntime.LIB_VERSION, + ) + stub_blocking = extract(Path("./python/voicevox_core/_rust/blocking.pyi")) + stub_asyncio = extract(Path("./python/voicevox_core/_rust/asyncio.pyi")) + assert len({REAL_BLOCKING, REAL_ASYNCIO, stub_blocking, stub_asyncio}) == 1 + + +def extract(pyi: Path) -> Tuple[str, str]: + module = ast.parse(pyi.read_text(encoding="utf-8")) + class_def = next( + stmt + for stmt in module.body + if isinstance(stmt, ClassDef) and stmt.name == "Onnxruntime" + ) + lib_name_value = next( + stmt.value.value + for stmt in class_def.body + if isinstance(stmt, AnnAssign) + and isinstance(stmt.target, Name) + and stmt.target.id == "LIB_NAME" + and isinstance(stmt.value, Constant) + and isinstance(stmt.value.value, str) + ) + lib_version_value = next( + stmt.value.value + for stmt in class_def.body + if isinstance(stmt, AnnAssign) + and isinstance(stmt.target, Name) + and stmt.target.id == "LIB_VERSION" + and isinstance(stmt.value, Constant) + and isinstance(stmt.value.value, str) + ) + return (lib_name_value, lib_version_value) diff --git a/crates/voicevox_core_python_api/python/voicevox_core/__init__.py b/crates/voicevox_core_python_api/python/voicevox_core/__init__.py index 4ccbad3fe..e9a0a03c0 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/__init__.py +++ b/crates/voicevox_core_python_api/python/voicevox_core/__init__.py @@ -19,6 +19,7 @@ GetSupportedDevicesError, GpuSupportError, InferenceFailedError, + InitInferenceRuntimeError, InvalidModelDataError, InvalidWordError, LoadUserDictError, @@ -34,7 +35,6 @@ UseUserDictError, WordNotFoundError, __version__, - supported_devices, ) from . import asyncio, blocking # noqa: F401 isort: skip @@ -50,6 +50,7 @@ "GetSupportedDevicesError", "GpuSupportError", "InferenceFailedError", + "InitInferenceRuntimeError", "InvalidModelDataError", "InvalidWordError", "LoadUserDictError", @@ -67,7 +68,6 @@ "StyleNotFoundError", "StyleVersion", "SupportedDevices", - "supported_devices", "UseUserDictError", "UserDictWord", "UserDictWordType", diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_load_dlls.py b/crates/voicevox_core_python_api/python/voicevox_core/_load_dlls.py index db8b4cdc5..0f9e3d034 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_load_dlls.py +++ b/crates/voicevox_core_python_api/python/voicevox_core/_load_dlls.py @@ -1,3 +1,4 @@ +# TODO: voicevox_onnxruntimeになったらやめる import glob import platform from ctypes import CDLL diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.pyi b/crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.pyi index 89a50d230..81168843f 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.pyi +++ b/crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.pyi @@ -1,22 +1,5 @@ -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from voicevox_core import SupportedDevices - __version__: str -def supported_devices() -> SupportedDevices: - """ - このライブラリで利用可能なデバイスの情報を取得する。 - - .. code-block:: - - import voicevox_core - - supported_devices = voicevox_core.supported_devices() - """ - ... - class NotLoadedOpenjtalkDictError(Exception): """open_jtalk辞書ファイルが読み込まれていない。""" @@ -27,6 +10,11 @@ class GpuSupportError(Exception): ... +class InitInferenceRuntimeError(Exception): + """推論ライブラリのロードまたは初期化ができなかった。""" + + ... + class OpenZipFileError(Exception): """ZIPファイルを開くことに失敗した。""" diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi b/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi index 468d885ee..7652a7d2c 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi +++ b/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi @@ -9,6 +9,7 @@ if TYPE_CHECKING: AudioQuery, SpeakerMeta, StyleId, + SupportedDevices, UserDictWord, VoiceModelId, ) @@ -37,6 +38,73 @@ class VoiceModel: """メタ情報。""" ... +class Onnxruntime: + """ + ONNX Runtime。 + + シングルトンであり、インスタンスは高々一つ。 + + .. code-block:: + + ort1 = await Onnxruntime.load_once() + ort2 = Onnxruntime.get() + assert ort2 + assert ort2 is ort1 + + .. code-block:: + + ort = await voicevox_core.asyncio.Onnxruntime.load_once() + assert voicevox_core.blocking.Onnxruntime.get() + """ + + # ここの定数値が本物と合致するかどうかは、test_type_stub_consts.pyで担保する。 + + LIB_NAME: str = "onnxruntime" + """ONNX Runtimeのライブラリ名。""" + + LIB_VERSION: str = "1.17.3" + """推奨されるONNX Runtimeのバージョン。""" + + LIB_VERSIONED_FILENAME: str + """ + :attr:`LIB_NAME` と :attr:`LIB_VERSION` からなる動的ライブラリのファイル名。 + + WindowsとAndroidでは :attr:`LIB_UNVERSIONED_FILENAME` と同じ。 + """ + + LIB_UNVERSIONED_FILENAME: str + """:attr:`LIB_NAME` からなる動的ライブラリのファイル名。""" + + @staticmethod + def get() -> Union["Onnxruntime", None]: + """ + インスタンスが既に作られているならそれを得る。 + + 作られていなければ ``None`` を返す。 + """ + ... + @staticmethod + async def load_once(*, filename: str = LIB_VERSIONED_FILENAME) -> "Onnxruntime": + """ + ONNX Runtimeをロードして初期化する。 + + 一度成功したら、以後は引数を無視して同じインスタンスを返す。 + + Parameters + ---------- + filename + ONNX Runtimeのファイル名(モジュール名)もしくはファイルパス。 + ``dlopen``/`LoadLibraryExW + `_ + の引数に使われる。 + """ + ... + def supported_devices(self) -> SupportedDevices: + """ + このライブラリで利用可能なデバイスの情報を取得する。 + """ + ... + class OpenJtalk: """ テキスト解析器としてのOpen JTalk。 @@ -72,6 +140,8 @@ class Synthesizer: Parameters ---------- + onnxruntime + ONNX Runtime。 open_jtalk Open JTalk。 acceleration_mode @@ -82,6 +152,7 @@ class Synthesizer: def __init__( self, + onnxruntime: Onnxruntime, open_jtalk: OpenJtalk, acceleration_mode: Union[ AccelerationMode, Literal["AUTO", "CPU", "GPU"] @@ -92,6 +163,10 @@ class Synthesizer: def __enter__(self) -> "Synthesizer": ... def __exit__(self, exc_type, exc_value, traceback) -> None: ... @property + def onnxruntime(self) -> Onnxruntime: + """ONNX Runtime。""" + ... + @property def is_gpu_mode(self) -> bool: """ハードウェアアクセラレーションがGPUモードかどうか。""" ... diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi b/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi index fd09eb8cd..602ff31bc 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi +++ b/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi @@ -9,6 +9,7 @@ if TYPE_CHECKING: AudioQuery, SpeakerMeta, StyleId, + SupportedDevices, UserDictWord, VoiceModelId, ) @@ -37,6 +38,73 @@ class VoiceModel: """メタ情報。""" ... +class Onnxruntime: + """ + ONNX Runtime。 + + シングルトンであり、インスタンスは高々一つ。 + + .. code-block:: + + ort1 = Onnxruntime.load_once() + ort2 = Onnxruntime.get() + assert ort2 + assert ort2 is ort1 + + .. code-block:: + + ort = voicevox_core.blocking.Onnxruntime.load_once() + assert voicevox_core.asyncio.Onnxruntime.get() + """ + + # ここの定数値が本物と合致するかどうかは、test_type_stub_consts.pyで担保する。 + + LIB_NAME: str = "onnxruntime" + """ONNX Runtimeのライブラリ名。""" + + LIB_VERSION: str = "1.17.3" + """推奨されるONNX Runtimeのバージョン。""" + + LIB_VERSIONED_FILENAME: str + """ + :attr:`LIB_NAME` と :attr:`LIB_VERSION` からなる動的ライブラリのファイル名。 + + WindowsとAndroidでは :attr:`LIB_UNVERSIONED_FILENAME` と同じ。 + """ + + LIB_UNVERSIONED_FILENAME: str + """:attr:`LIB_NAME` からなる動的ライブラリのファイル名。""" + + @staticmethod + def get() -> Union["Onnxruntime", None]: + """ + インスタンスが既に作られているならそれを得る。 + + 作られていなければ ``None`` を返す。 + """ + ... + @staticmethod + def load_once(*, filename: str = LIB_VERSIONED_FILENAME) -> "Onnxruntime": + """ + ONNX Runtimeをロードして初期化する。 + + 一度成功したら、以後は引数を無視して同じインスタンスを返す。 + + Parameters + ---------- + filename + ONNX Runtimeのファイル名(モジュール名)もしくはファイルパス。 + ``dlopen``/`LoadLibraryExW + `_ + の引数に使われる。 + """ + ... + def supported_devices(self) -> SupportedDevices: + """ + このライブラリで利用可能なデバイスの情報を取得する。 + """ + ... + class OpenJtalk: """ テキスト解析器としてのOpen JTalk。 @@ -67,6 +135,8 @@ class Synthesizer: Parameters ---------- + onnxruntime + ONNX Runtime。 open_jtalk Open JTalk。 acceleration_mode @@ -77,6 +147,7 @@ class Synthesizer: def __init__( self, + onnxruntime: Onnxruntime, open_jtalk: OpenJtalk, acceleration_mode: Union[ AccelerationMode, Literal["AUTO", "CPU", "GPU"] @@ -87,6 +158,10 @@ class Synthesizer: def __enter__(self) -> "Synthesizer": ... def __exit__(self, exc_type, exc_value, traceback) -> None: ... @property + def onnxruntime(self) -> Onnxruntime: + """ONNX Runtime。""" + ... + @property def is_gpu_mode(self) -> bool: """ハードウェアアクセラレーションがGPUモードかどうか。""" ... diff --git a/crates/voicevox_core_python_api/python/voicevox_core/asyncio.py b/crates/voicevox_core_python_api/python/voicevox_core/asyncio.py index 75b160814..2cff19cdf 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/asyncio.py +++ b/crates/voicevox_core_python_api/python/voicevox_core/asyncio.py @@ -1,4 +1,4 @@ # pyright: reportMissingModuleSource=false -from ._rust.asyncio import OpenJtalk, Synthesizer, UserDict, VoiceModel +from ._rust.asyncio import Onnxruntime, OpenJtalk, Synthesizer, UserDict, VoiceModel -__all__ = ["OpenJtalk", "Synthesizer", "UserDict", "VoiceModel"] +__all__ = ["Onnxruntime", "OpenJtalk", "Synthesizer", "UserDict", "VoiceModel"] diff --git a/crates/voicevox_core_python_api/python/voicevox_core/blocking.py b/crates/voicevox_core_python_api/python/voicevox_core/blocking.py index 80f61fdcb..7fed5fac7 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/blocking.py +++ b/crates/voicevox_core_python_api/python/voicevox_core/blocking.py @@ -1,4 +1,4 @@ # pyright: reportMissingModuleSource=false -from ._rust.blocking import OpenJtalk, Synthesizer, UserDict, VoiceModel +from ._rust.blocking import Onnxruntime, OpenJtalk, Synthesizer, UserDict, VoiceModel -__all__ = ["OpenJtalk", "Synthesizer", "UserDict", "VoiceModel"] +__all__ = ["Onnxruntime", "OpenJtalk", "Synthesizer", "UserDict", "VoiceModel"] diff --git a/crates/voicevox_core_python_api/src/convert.rs b/crates/voicevox_core_python_api/src/convert.rs index f40b2d449..0eee14a5b 100644 --- a/crates/voicevox_core_python_api/src/convert.rs +++ b/crates/voicevox_core_python_api/src/convert.rs @@ -16,10 +16,10 @@ use voicevox_core::{ use crate::{ ExtractFullContextLabelError, GetSupportedDevicesError, GpuSupportError, InferenceFailedError, - InvalidModelDataError, InvalidModelFormatError, InvalidWordError, LoadUserDictError, - ModelAlreadyLoadedError, ModelNotFoundError, NotLoadedOpenjtalkDictError, OpenZipFileError, - ParseKanaError, ReadZipEntryError, SaveUserDictError, StyleAlreadyLoadedError, - StyleNotFoundError, UseUserDictError, WordNotFoundError, + InitInferenceRuntimeError, InvalidModelDataError, InvalidModelFormatError, InvalidWordError, + LoadUserDictError, ModelAlreadyLoadedError, ModelNotFoundError, NotLoadedOpenjtalkDictError, + OpenZipFileError, ParseKanaError, ReadZipEntryError, SaveUserDictError, + StyleAlreadyLoadedError, StyleNotFoundError, UseUserDictError, WordNotFoundError, }; pub(crate) fn from_acceleration_mode(ob: &PyAny) -> PyResult { @@ -191,6 +191,7 @@ pub(crate) impl voicevox_core::Result { let top = match err.kind() { ErrorKind::NotLoadedOpenjtalkDict => NotLoadedOpenjtalkDictError::new_err(msg), ErrorKind::GpuSupport => GpuSupportError::new_err(msg), + ErrorKind::InitInferenceRuntime => InitInferenceRuntimeError::new_err(msg), ErrorKind::OpenZipFile => OpenZipFileError::new_err(msg), ErrorKind::ReadZipEntry => ReadZipEntryError::new_err(msg), ErrorKind::ModelAlreadyLoaded => ModelAlreadyLoadedError::new_err(msg), diff --git a/crates/voicevox_core_python_api/src/lib.rs b/crates/voicevox_core_python_api/src/lib.rs index e43fa4476..0da82f9ca 100644 --- a/crates/voicevox_core_python_api/src/lib.rs +++ b/crates/voicevox_core_python_api/src/lib.rs @@ -1,7 +1,7 @@ use std::marker::PhantomData; mod convert; -use self::convert::{from_utf8_path, to_pydantic_dataclass, VoicevoxCoreResultExt as _}; +use self::convert::{from_utf8_path, VoicevoxCoreResultExt as _}; use easy_ext::ext; use log::debug; use pyo3::{ @@ -9,7 +9,7 @@ use pyo3::{ exceptions::{PyException, PyKeyError, PyValueError}, pyfunction, pymodule, types::PyModule, - wrap_pyfunction, PyAny, PyResult, PyTypeInfo, Python, + wrap_pyfunction, PyResult, PyTypeInfo, Python, }; #[pymodule] @@ -18,7 +18,6 @@ fn rust(py: Python<'_>, module: &PyModule) -> PyResult<()> { pyo3_log::init(); module.add("__version__", env!("CARGO_PKG_VERSION"))?; - module.add_wrapped(wrap_pyfunction!(supported_devices))?; module.add_wrapped(wrap_pyfunction!(_validate_pronunciation))?; module.add_wrapped(wrap_pyfunction!(_to_zenkaku))?; @@ -26,6 +25,7 @@ fn rust(py: Python<'_>, module: &PyModule) -> PyResult<()> { let blocking_module = PyModule::new(py, "voicevox_core._rust.blocking")?; blocking_module.add_class::()?; + blocking_module.add_class::()?; blocking_module.add_class::()?; blocking_module.add_class::()?; blocking_module.add_class::()?; @@ -33,6 +33,7 @@ fn rust(py: Python<'_>, module: &PyModule) -> PyResult<()> { let asyncio_module = PyModule::new(py, "voicevox_core._rust.asyncio")?; asyncio_module.add_class::()?; + asyncio_module.add_class::()?; asyncio_module.add_class::()?; asyncio_module.add_class::()?; asyncio_module.add_class::()?; @@ -67,6 +68,7 @@ macro_rules! exceptions { exceptions! { NotLoadedOpenjtalkDictError: PyException; GpuSupportError: PyException; + InitInferenceRuntimeError: PyException; OpenZipFileError: PyException; ReadZipEntryError: PyException; ModelAlreadyLoadedError: PyException; @@ -86,16 +88,6 @@ exceptions! { InvalidWordError: PyValueError; } -#[pyfunction] -fn supported_devices(py: Python<'_>) -> PyResult<&PyAny> { - let class = py - .import("voicevox_core")? - .getattr("SupportedDevices")? - .downcast()?; - let s = voicevox_core::SupportedDevices::create().into_py_result(py)?; - to_pydantic_dataclass(s, class) -} - struct Closable { content: MaybeClosed, marker: PhantomData, @@ -149,13 +141,13 @@ fn _to_zenkaku(text: &str) -> PyResult { } mod blocking { - use std::{path::PathBuf, sync::Arc}; + use std::{ffi::OsString, path::PathBuf, sync::Arc}; use camino::Utf8PathBuf; use pyo3::{ pyclass, pymethods, types::{IntoPyDict as _, PyBytes, PyDict, PyList}, - PyAny, PyObject, PyRef, PyResult, Python, + Py, PyAny, PyObject, PyRef, PyResult, Python, }; use uuid::Uuid; use voicevox_core::{ @@ -191,6 +183,70 @@ mod blocking { } } + static ONNXRUNTIME: once_cell::sync::OnceCell> = + once_cell::sync::OnceCell::new(); + + #[pyclass] + #[derive(Clone)] + pub(crate) struct Onnxruntime(&'static voicevox_core::blocking::Onnxruntime); + + #[pymethods] + impl Onnxruntime { + #[classattr] + const LIB_NAME: &'static str = voicevox_core::blocking::Onnxruntime::LIB_NAME; + + #[classattr] + const LIB_VERSION: &'static str = voicevox_core::blocking::Onnxruntime::LIB_VERSION; + + #[classattr] + const LIB_VERSIONED_FILENAME: &'static str = + voicevox_core::blocking::Onnxruntime::LIB_VERSIONED_FILENAME; + + #[classattr] + const LIB_UNVERSIONED_FILENAME: &'static str = + voicevox_core::blocking::Onnxruntime::LIB_UNVERSIONED_FILENAME; + + #[staticmethod] + fn get(py: Python<'_>) -> PyResult>> { + let result = ONNXRUNTIME.get_or_try_init(|| { + match voicevox_core::blocking::Onnxruntime::get().map(|o| Py::new(py, Self(o))) { + Some(Ok(this)) => Ok(this), + Some(Err(err)) => Err(Some(err)), + None => Err(None), + } + }); + + match result { + Ok(this) => Ok(Some(this.clone())), + Err(Some(err)) => Err(err), + Err(None) => Ok(None), + } + } + + #[staticmethod] + #[pyo3(signature = (*, filename = Self::LIB_VERSIONED_FILENAME.into()))] + fn load_once(filename: OsString, py: Python<'_>) -> PyResult> { + ONNXRUNTIME + .get_or_try_init(|| { + let inner = voicevox_core::blocking::Onnxruntime::load_once() + .filename(filename) + .exec() + .into_py_result(py)?; + Py::new(py, Self(inner)) + }) + .cloned() + } + + fn supported_devices<'py>(&self, py: Python<'py>) -> PyResult<&'py PyAny> { + let class = py + .import("voicevox_core")? + .getattr("SupportedDevices")? + .downcast()?; + let s = self.0.supported_devices().into_py_result(py)?; + crate::convert::to_pydantic_dataclass(s, class) + } + } + #[pyclass] #[derive(Clone)] pub(crate) struct OpenJtalk { @@ -228,11 +284,13 @@ mod blocking { impl Synthesizer { #[new] #[pyo3(signature =( + onnxruntime, open_jtalk, acceleration_mode = InitializeOptions::default().acceleration_mode, cpu_num_threads = InitializeOptions::default().cpu_num_threads, ))] fn new( + onnxruntime: Onnxruntime, open_jtalk: OpenJtalk, #[pyo3(from_py_with = "crate::convert::from_acceleration_mode")] acceleration_mode: AccelerationMode, @@ -240,6 +298,7 @@ mod blocking { py: Python<'_>, ) -> PyResult { let inner = voicevox_core::blocking::Synthesizer::new( + onnxruntime.0, open_jtalk.open_jtalk.clone(), &InitializeOptions { acceleration_mode, @@ -270,6 +329,11 @@ mod blocking { self.close(); } + #[getter] + fn onnxruntime(&self) -> Py { + ONNXRUNTIME.get().expect("should be initialized").clone() + } + #[getter] fn is_gpu_mode(&self) -> PyResult { let synthesizer = self.synthesizer.get()?; @@ -577,13 +641,13 @@ mod blocking { } mod asyncio { - use std::{path::PathBuf, sync::Arc}; + use std::{ffi::OsString, path::PathBuf, sync::Arc}; use camino::Utf8PathBuf; use pyo3::{ pyclass, pymethods, types::{IntoPyDict as _, PyBytes, PyDict, PyList}, - PyAny, PyObject, PyRef, PyResult, Python, ToPyObject as _, + Py, PyAny, PyObject, PyRef, PyResult, Python, ToPyObject as _, }; use uuid::Uuid; use voicevox_core::{ @@ -622,6 +686,71 @@ mod asyncio { } } + static ONNXRUNTIME: once_cell::sync::OnceCell> = + once_cell::sync::OnceCell::new(); + + #[pyclass] + #[derive(Clone)] + pub(crate) struct Onnxruntime(&'static voicevox_core::tokio::Onnxruntime); + + #[pymethods] + impl Onnxruntime { + #[classattr] + const LIB_NAME: &'static str = voicevox_core::tokio::Onnxruntime::LIB_NAME; + + #[classattr] + const LIB_VERSION: &'static str = voicevox_core::tokio::Onnxruntime::LIB_VERSION; + + #[classattr] + const LIB_VERSIONED_FILENAME: &'static str = + voicevox_core::tokio::Onnxruntime::LIB_VERSIONED_FILENAME; + + #[classattr] + const LIB_UNVERSIONED_FILENAME: &'static str = + voicevox_core::tokio::Onnxruntime::LIB_UNVERSIONED_FILENAME; + + #[staticmethod] + fn get(py: Python<'_>) -> PyResult>> { + let result = ONNXRUNTIME.get_or_try_init(|| { + match voicevox_core::tokio::Onnxruntime::get().map(|o| Py::new(py, Self(o))) { + Some(Ok(this)) => Ok(this), + Some(Err(err)) => Err(Some(err)), + None => Err(None), + } + }); + + match result { + Ok(this) => Ok(Some(this.clone())), + Err(Some(err)) => Err(err), + Err(None) => Ok(None), + } + } + + #[staticmethod] + #[pyo3(signature = (*, filename = Self::LIB_VERSIONED_FILENAME.into()))] + fn load_once(filename: OsString, py: Python<'_>) -> PyResult<&PyAny> { + pyo3_asyncio::tokio::future_into_py(py, async move { + let inner = voicevox_core::tokio::Onnxruntime::load_once() + .filename(filename) + .exec() + .await; + + ONNXRUNTIME.get_or_try_init(|| { + Python::with_gil(|py| Py::new(py, Self(inner.into_py_result(py)?))) + }) + }) + } + + fn supported_devices<'py>(&self, py: Python<'py>) -> PyResult<&'py PyAny> { + let class = py + .import("voicevox_core")? + .getattr("SupportedDevices")? + .downcast()?; + let s = self.0.supported_devices().into_py_result(py)?; + crate::convert::to_pydantic_dataclass(s, class) + } + } + #[pyclass] #[derive(Clone)] pub(crate) struct OpenJtalk { @@ -664,17 +793,20 @@ mod asyncio { impl Synthesizer { #[new] #[pyo3(signature =( + onnxruntime, open_jtalk, acceleration_mode = InitializeOptions::default().acceleration_mode, cpu_num_threads = InitializeOptions::default().cpu_num_threads, ))] fn new( + onnxruntime: Onnxruntime, open_jtalk: OpenJtalk, #[pyo3(from_py_with = "crate::convert::from_acceleration_mode")] acceleration_mode: AccelerationMode, cpu_num_threads: u16, ) -> PyResult { let synthesizer = voicevox_core::tokio::Synthesizer::new( + onnxruntime.0, open_jtalk.open_jtalk.clone(), &InitializeOptions { acceleration_mode, @@ -704,6 +836,11 @@ mod asyncio { self.close(); } + #[getter] + fn onnxruntime(&self) -> Py { + ONNXRUNTIME.get().expect("should be initialized").clone() + } + #[getter] fn is_gpu_mode(&self) -> PyResult { let synthesizer = self.synthesizer.get()?; diff --git a/docs/apis/c_api/doxygen/Doxyfile b/docs/apis/c_api/doxygen/Doxyfile index c42bd60e0..c845adbd4 100644 --- a/docs/apis/c_api/doxygen/Doxyfile +++ b/docs/apis/c_api/doxygen/Doxyfile @@ -2257,7 +2257,7 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -PREDEFINED = +PREDEFINED = VOICEVOX_LOAD_ONNXRUNTIME= VOICEVOX_LINK_ONNXRUNTIME= # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The @@ -2660,6 +2660,7 @@ GENERATE_LEGEND = YES DOT_CLEANUP = YES +ALIASES += availability{1}="

Availability
\1
" ALIASES += example{1}="
Example
\1
" ALIASES += examples{1}="
Examples
\1
" diff --git a/docs/feature-options.md b/docs/feature-options.md new file mode 100644 index 000000000..16487d3bf --- /dev/null +++ b/docs/feature-options.md @@ -0,0 +1,26 @@ +## ONNX Runtimeのリンク方法のオプション + +Rust API(`voicevox_core`)およびC API(`voicevox_core_c_api`)においては、ビルド時に +次のCargoフィーチャのうちどちらかを選択しなければなりません。 +詳しくは[voicevox_core/Cargo.toml](../crates/voicevox_core/Cargo.toml)のコメントを参照して +下さい。Python APIやJava APIでは`load-onnxruntime`のみに限定しています。 + +- `load-onnxruntime` +- `link-onnxruntime` + +```console +❯ cargo build --release -p voicevox_core_c_api --features load-onnxruntime +❯ sed 's:^//\(#define VOICEVOX_LOAD_ONNXRUNTIME\)$:\1:' \ + crates/voicevox_core_c_api/include/voicevox_core.h \ + > ./voicevox_core.h +``` + +```console +❯ cargo build --release -p voicevox_core_c_api --features link-onnxruntime +❯ sed 's:^//\(#define VOICEVOX_LINK_ONNXRUNTIME\)$:\1:' \ + crates/voicevox_core_c_api/include/voicevox_core.h \ + > ./voicevox_core.h +``` + +C APIのリリースでは`dlopen`の利用が厳しいiOSでのみ`link-onnxruntime`で、その他は`load-onnxruntime`で +ビルドしています。 diff --git a/docs/usage.md b/docs/usage.md index 91da1c62f..e828ae220 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -62,11 +62,11 @@ VOICEVOX コアでは`Synthesizer`に音声モデルを読み込むことでテ ```python from pprint import pprint -from voicevox_core.blocking import OpenJtalk, Synthesizer, VoiceModel +from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer, VoiceModel # 1. Synthesizerの初期化 open_jtalk_dict_dir = "open_jtalk_dic_utf_8-1.11" -synthesizer = Synthesizer(OpenJtalk(open_jtalk_dict_dir)) +synthesizer = Synthesizer(Onnxruntime.load_once(), OpenJtalk(open_jtalk_dict_dir)) # 2. 音声モデルの読み込み model = VoiceModel.from_path("model/0.vvm") @@ -82,7 +82,7 @@ with open("output.wav", "wb") as f: ### 1. Synthesizer の初期化 -辞書などを取り扱う`OpenJtalk`のインスタンスを引数に渡して`Synthesizer`を初期化します。`Synthesizer`は音声合成だけでなく、音声モデルを複数読み込んだり、イントネーションのみを生成することもできます。 +AIエンジンの`Onnxruntime`のインスタンスと、辞書などを取り扱う`OpenJtalk`のインスタンスを引数に渡して`Synthesizer`を初期化します。`Synthesizer`は音声合成だけでなく、音声モデルを複数読み込んだり、イントネーションのみを生成することもできます。 ### 2. 音声モデルの読み込み diff --git a/example/cpp/unix/simple_tts.cpp b/example/cpp/unix/simple_tts.cpp index c7683d49a..5db24b12e 100644 --- a/example/cpp/unix/simple_tts.cpp +++ b/example/cpp/unix/simple_tts.cpp @@ -20,14 +20,21 @@ int main(int argc, char *argv[]) { std::cout << "coreの初期化中..." << std::endl; auto initialize_options = voicevox_make_default_initialize_options(); + const VoicevoxOnnxruntime* onnxruntime; + auto load_ort_options = voicevox_make_default_load_onnxruntime_options(); + auto result = voicevox_onnxruntime_load_once(load_ort_options, &onnxruntime); + if (result != VOICEVOX_RESULT_OK){ + std::cerr << voicevox_error_result_to_message(result) << std::endl; + return 1; + } OpenJtalkRc* open_jtalk; - auto result = voicevox_open_jtalk_rc_new(open_jtalk_dict_path.c_str(),&open_jtalk); + result = voicevox_open_jtalk_rc_new(open_jtalk_dict_path.c_str(),&open_jtalk); if (result != VOICEVOX_RESULT_OK){ std::cerr << voicevox_error_result_to_message(result) << std::endl; return 1; } VoicevoxSynthesizer* synthesizer; - result = voicevox_synthesizer_new(open_jtalk,initialize_options,&synthesizer); + result = voicevox_synthesizer_new(onnxruntime,open_jtalk,initialize_options,&synthesizer); if (result != VOICEVOX_RESULT_OK) { std::cerr << voicevox_error_result_to_message(result) << std::endl; return 1; diff --git a/example/cpp/windows/simple_tts/simple_tts.cpp b/example/cpp/windows/simple_tts/simple_tts.cpp index bd070505f..946ef9679 100644 --- a/example/cpp/windows/simple_tts/simple_tts.cpp +++ b/example/cpp/windows/simple_tts/simple_tts.cpp @@ -33,14 +33,21 @@ int main() { VoicevoxInitializeOptions initializeOptions = voicevox_make_default_initialize_options(); std::string dict = GetOpenJTalkDict(); + const VoicevoxOnnxruntime* onnxruntime; + auto load_ort_options = voicevox_make_default_load_onnxruntime_options(); + auto result = voicevox_onnxruntime_load_once(load_ort_options, &onnxruntime); + if (result != VoicevoxResultCode::VOICEVOX_RESULT_OK) { + OutErrorMessage(result); + return 0; + } OpenJtalkRc* open_jtalk; - auto result = voicevox_open_jtalk_rc_new(dict.c_str(),&open_jtalk); + result = voicevox_open_jtalk_rc_new(dict.c_str(),&open_jtalk); if (result != VoicevoxResultCode::VOICEVOX_RESULT_OK) { OutErrorMessage(result); return 0; } VoicevoxSynthesizer* synthesizer; - result = voicevox_synthesizer_new(open_jtalk,initializeOptions,&synthesizer); + result = voicevox_synthesizer_new(onnxruntime,open_jtalk,initializeOptions,&synthesizer); if (result != VoicevoxResultCode::VOICEVOX_RESULT_OK) { OutErrorMessage(result); return 0; diff --git a/example/kotlin/README.md b/example/kotlin/README.md index 3bcdafc94..7dc5b637c 100644 --- a/example/kotlin/README.md +++ b/example/kotlin/README.md @@ -47,6 +47,7 @@ Usage: voicevoxcoreexample options_list Options: --mode [AUTO] -> モード { Value should be one of [auto, cpu, gpu] } --vvm -> vvmファイルへのパス (always required) { String } + --onnxruntime [libonnxruntime.so.1.17.3] -> ONNX Runtimeのファイル名(モジュール名)もしくはファイルパス { String } --dictDir [./open_jtalk_dic_utf_8-1.11] -> Open JTalkの辞書ディレクトリ { String } --text [この音声は、ボイスボックスを使用して、出力されています。] -> 読み上げさせたい文章 { String } --out [./output.wav] -> 出力wavファイルのパス { String } @@ -56,9 +57,13 @@ Options: ## 実行例 + + + ```console -❯ ./gradlew run --args="--vvm ../../crates/test_util/data/model/sample.vvm" -Inititalizing: AUTO, ./open_jtalk_dic_utf_8-1.11 +❯ # Linuxの場合 +❯ ./gradlew run --args="--vvm ../../crates/test_util/data/model/sample.vvm --onnxruntime ../../crates/test_util/data/lib/libonnxruntime.so.1.17.3" +Inititalizing: AUTO, ../../crates/test_util/data/lib/libonnxruntime.so.1.17.3, ./open_jtalk_dic_utf_8-1.11 Loading: ../../crates/test_util/data/model/sample.vvm Creating an AudioQuery from the text: この音声は、ボイスボックスを使用して、出力されています。 Synthesizing... diff --git a/example/kotlin/app/src/main/kotlin/app/App.kt b/example/kotlin/app/src/main/kotlin/app/App.kt index 3e76043f4..7f2651020 100644 --- a/example/kotlin/app/src/main/kotlin/app/App.kt +++ b/example/kotlin/app/src/main/kotlin/app/App.kt @@ -15,6 +15,10 @@ fun main(args: Array) { val mode by parser.option(ArgType.Choice(), description = "モード").default(Mode.AUTO) val vvmPath by parser.option(ArgType.String, fullName = "vvm", description = "vvmファイルへのパス").required() + val onnxruntime by + parser + .option(ArgType.String, description = "ONNX Runtimeのファイル名(モジュール名)もしくはファイルパス") + .default(Onnxruntime.LIB_VERSIONED_FILENAME) val dictDir by parser .option(ArgType.String, description = "Open JTalkの辞書ディレクトリ") @@ -28,10 +32,11 @@ fun main(args: Array) { parser.parse(args) - println("Inititalizing: ${mode}, ${dictDir}") + println("Inititalizing: ${mode}, ${onnxruntime}, ${dictDir}") + val ort = Onnxruntime.loadOnce().filename(onnxruntime).exec() val openJtalk = OpenJtalk(dictDir) val synthesizer = - Synthesizer.builder(openJtalk) + Synthesizer.builder(ort, openJtalk) .accelerationMode( when (mode) { Mode.AUTO -> Synthesizer.AccelerationMode.AUTO diff --git a/example/python/run-asyncio.py b/example/python/run-asyncio.py index 70d204a92..b75509183 100644 --- a/example/python/run-asyncio.py +++ b/example/python/run-asyncio.py @@ -8,9 +8,8 @@ from pathlib import Path from typing import Tuple -import voicevox_core from voicevox_core import AccelerationMode, AudioQuery -from voicevox_core.asyncio import OpenJtalk, Synthesizer, VoiceModel +from voicevox_core.asyncio import Onnxruntime, OpenJtalk, Synthesizer, VoiceModel async def main() -> None: @@ -23,17 +22,23 @@ async def main() -> None: ( acceleration_mode, vvm_path, + onnxruntime_filename, open_jtalk_dict_dir, text, out, style_id, ) = parse_args() - logger.debug("%s", f"{voicevox_core.supported_devices()=}") + logger.info("%s", f"Loading ONNX Runtime ({onnxruntime_filename=})") + onnxruntime = await Onnxruntime.load_once(filename=onnxruntime_filename) + + logger.debug("%s", f"{onnxruntime.supported_devices()=}") logger.info("%s", f"Initializing ({acceleration_mode=}, {open_jtalk_dict_dir=})") synthesizer = Synthesizer( - await OpenJtalk.new(open_jtalk_dict_dir), acceleration_mode=acceleration_mode + onnxruntime, + await OpenJtalk.new(open_jtalk_dict_dir), + acceleration_mode=acceleration_mode, ) logger.debug("%s", f"{synthesizer.metas=}") @@ -53,7 +58,7 @@ async def main() -> None: logger.info("%s", f"Wrote `{out}`") -def parse_args() -> Tuple[AccelerationMode, Path, Path, str, Path, int]: +def parse_args() -> Tuple[AccelerationMode, Path, str, Path, str, Path, int]: argparser = ArgumentParser() argparser.add_argument( "--mode", @@ -66,6 +71,11 @@ def parse_args() -> Tuple[AccelerationMode, Path, Path, str, Path, int]: type=Path, help="vvmファイルへのパス", ) + argparser.add_argument( + "--onnxruntime", + default=Onnxruntime.LIB_VERSIONED_FILENAME, + help="ONNX Runtimeのライブラリのfilename", + ) argparser.add_argument( "--dict-dir", default="./open_jtalk_dic_utf_8-1.11", @@ -90,7 +100,16 @@ def parse_args() -> Tuple[AccelerationMode, Path, Path, str, Path, int]: help="話者IDを指定", ) args = argparser.parse_args() - return (args.mode, args.vvm, args.dict_dir, args.text, args.out, args.style_id) + # FIXME: 流石に多くなってきたので、`dataclass`化する + return ( + args.mode, + args.vvm, + args.onnxruntime, + args.dict_dir, + args.text, + args.out, + args.style_id, + ) def display_as_json(audio_query: AudioQuery) -> str: diff --git a/example/python/run.py b/example/python/run.py index a57139b1c..3a9fdd9e7 100644 --- a/example/python/run.py +++ b/example/python/run.py @@ -5,9 +5,8 @@ from pathlib import Path from typing import Tuple -import voicevox_core from voicevox_core import AccelerationMode, AudioQuery -from voicevox_core.blocking import OpenJtalk, Synthesizer, VoiceModel +from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer, VoiceModel def main() -> None: @@ -20,17 +19,23 @@ def main() -> None: ( acceleration_mode, vvm_path, + onnxruntime_filename, open_jtalk_dict_dir, text, out, style_id, ) = parse_args() - logger.debug("%s", f"{voicevox_core.supported_devices()=}") + logger.info("%s", f"Loading ONNX Runtime ({onnxruntime_filename=})") + onnxruntime = Onnxruntime.load_once(filename=onnxruntime_filename) + + logger.debug("%s", f"{onnxruntime.supported_devices()=}") logger.info("%s", f"Initializing ({acceleration_mode=}, {open_jtalk_dict_dir=})") synthesizer = Synthesizer( - OpenJtalk(open_jtalk_dict_dir), acceleration_mode=acceleration_mode + onnxruntime, + OpenJtalk(open_jtalk_dict_dir), + acceleration_mode=acceleration_mode, ) logger.debug("%s", f"{synthesizer.metas=}") @@ -50,7 +55,7 @@ def main() -> None: logger.info("%s", f"Wrote `{out}`") -def parse_args() -> Tuple[AccelerationMode, Path, Path, str, Path, int]: +def parse_args() -> Tuple[AccelerationMode, Path, str, Path, str, Path, int]: argparser = ArgumentParser() argparser.add_argument( "--mode", @@ -63,6 +68,11 @@ def parse_args() -> Tuple[AccelerationMode, Path, Path, str, Path, int]: type=Path, help="vvmファイルへのパス", ) + argparser.add_argument( + "--onnxruntime", + default=Onnxruntime.LIB_VERSIONED_FILENAME, + help="ONNX Runtimeのライブラリのfilename", + ) argparser.add_argument( "--dict-dir", default="./open_jtalk_dic_utf_8-1.11", @@ -87,7 +97,16 @@ def parse_args() -> Tuple[AccelerationMode, Path, Path, str, Path, int]: help="話者IDを指定", ) args = argparser.parse_args() - return (args.mode, args.vvm, args.dict_dir, args.text, args.out, args.style_id) + # FIXME: 流石に多くなってきたので、`dataclass`化する + return ( + args.mode, + args.vvm, + args.onnxruntime, + args.dict_dir, + args.text, + args.out, + args.style_id, + ) def display_as_json(audio_query: AudioQuery) -> str: