diff --git a/.github/workflows/build_and_deploy.yml b/.github/workflows/build_and_deploy.yml index 28f246220..95eefcc69 100644 --- a/.github/workflows/build_and_deploy.yml +++ b/.github/workflows/build_and_deploy.yml @@ -228,7 +228,7 @@ jobs: - name: Raplace resource if: inputs.is_production shell: bash - run: | + run: mv -f download/resource/core/README.md ./README.md - name: Install cargo-binstall uses: taiki-e/install-action@cargo-binstall @@ -298,7 +298,7 @@ jobs: cp -v target/${{ matrix.target }}/release/libvoicevox_core_java_api.so java_artifact/ || true - name: Code signing (Windows) if: startsWith(matrix.os, 'windows') && inputs.code_signing - run: | + run: bash build_util/codesign.bash "artifact/${{ env.ASSET_NAME }}/voicevox_core.dll" env: ESIGNERCKA_USERNAME: ${{ secrets.ESIGNERCKA_USERNAME }} @@ -344,44 +344,34 @@ jobs: needs: [config, build_and_deploy] runs-on: macos-12 env: + IOS_X86_64_PATH: artifact/voicevox_core-x86_64-apple-ios + IOS_AARCH64_SIM_PATH: artifact/voicevox_core-aarch64-apple-ios-sim + IOS_AARCH64_PATH: artifact/voicevox_core-aarch64-apple-ios ASSET_NAME: voicevox_core-ios-xcframework-cpu-${{ needs.config.outputs.version }} steps: - uses: actions/checkout@v3 - uses: actions/download-artifact@v2 with: name: voicevox_core-x86_64-apple-ios - path: artifact/voicevox_core-x86_64-apple-ios + path: ${{ env.IOS_X86_64_PATH }} - uses: actions/download-artifact@v2 with: name: voicevox_core-aarch64-apple-ios-sim - path: artifact/voicevox_core-aarch64-apple-ios-sim + path: ${{ env.IOS_AARCH64_SIM_PATH }} - uses: actions/download-artifact@v2 with: name: voicevox_core-aarch64-apple-ios - path: artifact/voicevox_core-aarch64-apple-ios - - name: Create fat binary + path: ${{ env.IOS_AARCH64_PATH }} + - name: Create xcframework + id: create-xcframework run: | - mkdir -p "artifact/voicevox_core-sim" - lipo -create "artifact/voicevox_core-x86_64-apple-ios/libvoicevox_core.dylib" "artifact/voicevox_core-aarch64-apple-ios-sim/libvoicevox_core.dylib" -output "artifact/voicevox_core-sim/libvoicevox_core.dylib" - - name: Create XCFramework - run: | - mkdir -p "artifact/${{ env.ASSET_NAME }}" - # 必要なファイルだけコピー - mkdir -p "Headers-sim" - cp -v artifact/voicevox_core-x86_64-apple-ios/voicevox_core.h "Headers-sim" - cp -v crates/voicevox_core_c_api/xcframework/Headers/module.modulemap "Headers-sim" - mkdir -p "Headers-aarch64" - cp -v artifact/voicevox_core-aarch64-apple-ios/voicevox_core.h "Headers-aarch64" - cp -v crates/voicevox_core_c_api/xcframework/Headers/module.modulemap "Headers-aarch64" - xcodebuild -create-xcframework \ - -library "artifact/voicevox_core-sim/libvoicevox_core.dylib" \ - -headers "Headers-sim" \ - -library "artifact/voicevox_core-aarch64-apple-ios/libvoicevox_core.dylib" \ - -headers "Headers-aarch64" \ - -output "artifact/${{ env.ASSET_NAME }}/voicevox_core.xcframework" + build_util/make_ios_xcframework.bash + echo "output_asset_path=${OUTPUT_ASSET_PATH}" >> "$GITHUB_OUTPUT" + env: + OUTPUT_ASSET_PATH: artifact/voicevox_core-ios-xcframework-cpu - name: Archive artifact run: | - cd artifact/${{ env.ASSET_NAME }} + cd ${{ steps.create-xcframework.outputs.output_asset_path }} 7z a "../../${{ env.ASSET_NAME }}.zip" "voicevox_core.xcframework" - name: Upload to Release if: fromJson(needs.config.outputs.deploy) @@ -410,7 +400,7 @@ jobs: - name: Raplace resource if: inputs.is_production shell: bash - run: | + run: rm -r ./model; mv download/fat_resource/core/model ./model - name: Create artifact run: | @@ -452,7 +442,7 @@ jobs: - name: Install cargo-edit run: cargo binstall cargo-edit@^0.11 --no-confirm - name: set cargo version - run: | + run: cargo set-version "$VERSION" -p voicevox_core_java_api - name: "Download artifact (android-arm64-cpu)" diff --git a/.github/workflows/cargo-deny.yml b/.github/workflows/cargo-deny.yml index ac4978b9a..cf600f535 100644 --- a/.github/workflows/cargo-deny.yml +++ b/.github/workflows/cargo-deny.yml @@ -13,7 +13,7 @@ jobs: - name: Install cargo-binstall uses: taiki-e/install-action@cargo-binstall - name: Install cargo-deny - run: cargo binstall cargo-deny@^0.13 --no-confirm --log-level debug + run: cargo binstall cargo-deny@^0.14 --no-confirm --log-level debug - name: cargo-deny run: | if ${{ !!github.event.release }}; then diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index aebeb0318..b409ea567 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -151,7 +151,8 @@ jobs: metadata=$(cargo metadata --format-version 1) version=$( jq -r ' - (.workspace_members[] | select(startswith("xtask "))) as $xtask + .workspace_members as $workspace_members + | (.packages[] | select(.name == "xtask").id | select(. as $id | $workspace_members | index($id))) as $xtask | (.resolve.nodes[] | select(.id == $xtask).deps[] | select(.name == "cbindgen").pkg) as $cbindgen | .packages[] | select(.id == $cbindgen).version ' <<< "$metadata" diff --git a/Cargo.lock b/Cargo.lock index 824a492c3..9e2929033 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -219,19 +219,15 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.3.15" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "942c7cd7ae39e91bde4820d74132e9862e62c2f386c3aa90ccf55949f5bad63a" +checksum = "a116f46a969224200a0a97f29cfd4c50e7534e4b4826bd23ea2c3c533039c82c" dependencies = [ - "bzip2", "flate2", "futures-core", + "futures-io", "memchr", "pin-project-lite", - "tokio", - "xz2", - "zstd", - "zstd-safe", ] [[package]] @@ -242,8 +238,8 @@ checksum = "871f9bb5e0a22eeb7e8cf16641feb87c9dc67032ccf8ff49e772eb9941d3a965" dependencies = [ "async-task", "concurrent-queue", - "fastrand", - "futures-lite", + "fastrand 1.8.0", + "futures-lite 1.12.0", "once_cell", "slab", ] @@ -259,7 +255,7 @@ dependencies = [ "async-io", "async-lock", "blocking", - "futures-lite", + "futures-lite 1.12.0", "once_cell", ] @@ -271,7 +267,7 @@ checksum = "83e21f3a490c72b3b0cf44962180e60045de2925d8dff97918f7ee43c8f637c7" dependencies = [ "autocfg", "concurrent-queue", - "futures-lite", + "futures-lite 1.12.0", "libc", "log", "once_cell", @@ -307,7 +303,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-io", - "futures-lite", + "futures-lite 1.12.0", "gloo-timers", "kv-log-macro", "log", @@ -338,17 +334,15 @@ dependencies = [ [[package]] name = "async_zip" -version = "0.0.11" +version = "0.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c50d29ab7e2f9e808cca1a69ea56a36f4ff216f54a41a23aae1fd4afc05cc020" +checksum = "527207465fb6dcafbf661b0d4a51d0d2306c9d0c2975423079a6caa807930daf" dependencies = [ "async-compression", - "chrono", "crc32fast", - "log", + "futures-lite 2.2.0", "pin-project", "thiserror", - "tokio", ] [[package]] @@ -479,8 +473,8 @@ dependencies = [ "async-channel", "async-task", "atomic-waker", - "fastrand", - "futures-lite", + "fastrand 1.8.0", + "futures-lite 1.12.0", "once_cell", ] @@ -1094,7 +1088,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -1105,7 +1099,7 @@ checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" dependencies = [ "darling_core", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -1292,7 +1286,7 @@ checksum = "48c69b3965971f5d0ea6a6dd26b55cdd517ae0e1425dc8d94e482a5915bd7ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -1305,7 +1299,7 @@ dependencies = [ "num-traits", "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -1390,6 +1384,12 @@ dependencies = [ "instant", ] +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + [[package]] name = "filetime" version = "0.2.17" @@ -1502,7 +1502,7 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7694489acd39452c77daa48516b894c153f192c3578d5a839b62c58099fcbf48" dependencies = [ - "fastrand", + "fastrand 1.8.0", "futures-core", "futures-io", "memchr", @@ -1511,6 +1511,19 @@ dependencies = [ "waker-fn", ] +[[package]] +name = "futures-lite" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445ba825b27408685aaecefd65178908c36c6e96aaf6d8599419d46e624192ba" +dependencies = [ + "fastrand 2.0.1", + "futures-core", + "futures-io", + "parking", + "pin-project-lite", +] + [[package]] name = "futures-macro" version = "0.3.28" @@ -1519,7 +1532,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -1788,7 +1801,7 @@ dependencies = [ "async-std", "base64 0.13.0", "cookie", - "futures-lite", + "futures-lite 1.12.0", "infer", "pin-project-lite", "rand 0.7.3", @@ -1934,15 +1947,15 @@ checksum = "cef509aa9bc73864d6756f0d34d35504af3cf0844373afe9b8669a5b8005a729" dependencies = [ "console", "number_prefix", - "portable-atomic", + "portable-atomic 0.3.19", "unicode-width", ] [[package]] name = "indoc" -version = "1.0.7" +version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adab1eaa3408fb7f0c777a73e7465fd5656136fc93b670eb6df3c88c2c1344e3" +checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" [[package]] name = "infer" @@ -2009,7 +2022,7 @@ dependencies = [ "curl", "curl-sys", "flume", - "futures-lite", + "futures-lite 1.12.0", "http", "log", "once_cell", @@ -2045,6 +2058,15 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" +[[package]] +name = "jlabel" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f040b22c55628977296069dbf8635be49cc510999c048a1f1bdb56d00983148" +dependencies = [ + "thiserror", +] + [[package]] name = "jni" version = "0.21.1" @@ -2216,17 +2238,6 @@ dependencies = [ "value-bag", ] -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - [[package]] name = "matchers" version = "0.1.0" @@ -2591,7 +2602,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -2617,9 +2628,9 @@ checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" [[package]] name = "parking" -version = "2.0.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72" +checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" [[package]] name = "parking_lot" @@ -2667,7 +2678,7 @@ dependencies = [ "regex", "regex-syntax 0.7.5", "structmeta", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -2783,6 +2794,12 @@ version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26f6a7b87c2e435a3241addceeeff740ff8b7e76b74c13bf9acb17fa454ea00b" +[[package]] +name = "portable-atomic" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + [[package]] name = "ppv-lite86" version = "0.2.16" @@ -2862,9 +2879,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.69" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] @@ -2880,15 +2897,16 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.19.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e681a6cfdc4adcc93b4d3cf993749a4552018ee0a9b65fc0ccfad74352c72a38" +checksum = "53bdbb96d49157e65d45cc287af5f32ffadd5f4761438b527b055fb0d4bb8233" dependencies = [ "cfg-if", "indoc", "libc", "memoffset 0.9.0", "parking_lot", + "portable-atomic 1.6.0", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", @@ -2897,9 +2915,9 @@ dependencies = [ [[package]] name = "pyo3-asyncio" -version = "0.19.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2cc34c1f907ca090d7add03dc523acdd91f3a4dab12286604951e2f5152edad" +checksum = "6ea6b68e93db3622f3bb3bf363246cf948ed5375afe7abff98ccbdd50b184995" dependencies = [ "futures", "once_cell", @@ -2910,9 +2928,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.19.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "076c73d0bc438f7a4ef6fdd0c3bb4732149136abd952b110ac93e4edb13a6ba5" +checksum = "deaa5745de3f5231ce10517a1f5dd97d53e5a2fd77aa6b5842292085831d48d7" dependencies = [ "once_cell", "target-lexicon", @@ -2920,9 +2938,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.19.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e53cee42e77ebe256066ba8aa77eff722b3bb91f3419177cf4cd0f304d3284d9" +checksum = "62b42531d03e08d4ef1f6e85a2ed422eb678b8cd62b762e53891c05faf0d4afa" dependencies = [ "libc", "pyo3-build-config", @@ -2941,32 +2959,34 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.19.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfeb4c99597e136528c6dd7d5e3de5434d1ceaf487436a3f03b2d56b6fc9efd1" +checksum = "7305c720fa01b8055ec95e484a6eca7a83c841267f0dd5280f0c8b8551d2c158" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 1.0.102", + "syn 2.0.48", ] [[package]] name = "pyo3-macros-backend" -version = "0.19.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "947dc12175c254889edc0c02e399476c2f652b4b9ebd123aa655c224de259536" +checksum = "7c7e9b68bb9c3149c5b0cade5d07f953d6d125eb4337723c4ccdb665f1f96185" dependencies = [ + "heck", "proc-macro2", + "pyo3-build-config", "quote", - "syn 1.0.102", + "syn 2.0.48", ] [[package]] name = "quote" -version = "1.0.33" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -3218,6 +3238,18 @@ dependencies = [ "syn 1.0.102", ] +[[package]] +name = "rstest_reuse" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88530b681abe67924d42cca181d070e3ac20e0740569441a9e35a7cedd2b34a4" +dependencies = [ + "quote", + "rand 0.8.5", + "rustc_version 0.4.0", + "syn 2.0.48", +] + [[package]] name = "rustc-demangle" version = "0.1.21" @@ -3397,7 +3429,7 @@ checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -3479,7 +3511,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -3591,9 +3623,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.10.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "snafu" @@ -3722,7 +3754,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -3733,7 +3765,7 @@ checksum = "a60bcaff7397072dca0017d1db428e30d5002e00b6847703e2e42005c95fbe00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -3800,9 +3832,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.38" +version = "2.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" dependencies = [ "proc-macro2", "quote", @@ -3834,7 +3866,7 @@ checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6" dependencies = [ "autocfg", "cfg-if", - "fastrand", + "fastrand 1.8.0", "redox_syscall 0.3.5", "rustix 0.37.19", "windows-sys 0.48.0", @@ -3864,6 +3896,7 @@ dependencies = [ "async_zip", "flate2", "fs-err", + "futures-lite 2.2.0", "once_cell", "serde", "serde_json", @@ -3880,22 +3913,22 @@ checksum = "949517c0cf1bf4ee812e2e07e08ab448e3ae0d23472aee8a06c985f0c8815b16" [[package]] name = "thiserror" -version = "1.0.37" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.37" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" dependencies = [ "proc-macro2", "quote", - "syn 1.0.102", + "syn 2.0.48", ] [[package]] @@ -4013,7 +4046,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -4247,9 +4280,9 @@ checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" [[package]] name = "unindent" -version = "0.1.10" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58ee9362deb4a96cef4d437d1ad49cffc9b9e92d202b6995674e928ce684f112" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "universal-hash" @@ -4383,6 +4416,7 @@ dependencies = [ "humansize", "indexmap 2.0.0", "itertools 0.10.5", + "jlabel", "nanoid", "ndarray", "once_cell", @@ -4392,8 +4426,10 @@ dependencies = [ "rayon", "regex", "rstest", + "rstest_reuse", "serde", "serde_json", + "smallvec", "tempfile", "test_util", "thiserror", @@ -4471,7 +4507,7 @@ dependencies = [ "indexmap 2.0.0", "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -4918,15 +4954,6 @@ dependencies = [ "fs-err", ] -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - [[package]] name = "yansi" version = "0.5.1" @@ -4980,11 +5007,10 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.5+zstd.1.5.2" +version = "2.0.9+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edc50ffce891ad571e9f9afe5039c4837bede781ac4bb13052ed7ae695518596" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" dependencies = [ "cc", - "libc", "pkg-config", ] diff --git a/Cargo.toml b/Cargo.toml index e4e315514..2ed1092e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ anstyle-query = "1.0.0" anyhow = "1.0.65" assert_cmd = "2.0.8" async-std = "1.12.0" -async_zip = "0.0.11" +async_zip = "=0.0.16" binstall-tar = "0.4.39" bytes = "1.1.0" camino = "1.1.6" @@ -33,12 +33,14 @@ fs-err = "2.9.0" futures = "0.3.26" futures-core = "0.3.25" futures-util = "0.3.25" +futures-lite = "2.2.0" heck = "0.4.1" humansize = "2.1.2" indexmap = "2.0.0" indicatif = "0.17.3" inventory = "0.3.4" itertools = "0.10.5" +jlabel = "0.1.2" jni = "0.21.1" libc = "0.2.134" libloading = "0.7.3" @@ -53,17 +55,19 @@ ouroboros = "0.18.0" parse-display = "0.8.2" pretty_assertions = "1.3.0" proc-macro2 = "1.0.69" -pyo3 = "0.19.2" -pyo3-asyncio = "0.19.0" +pyo3 = "0.20.3" +pyo3-asyncio = "0.20.0" pyo3-log = "0.9.0" quote = "1.0.33" rayon = "1.6.1" regex = "1.10.0" reqwest = { version = "0.11.13", default-features = false } rstest = "0.15.0" +rstest_reuse = "0.6.0" serde = "1.0.145" serde_json = "1.0.85" serde_with = "3.3.0" +smallvec = "1.13.1" strum = "0.24.1" surf = "2.3.2" syn = "2.0.38" diff --git a/build_util/make_ios_xcframework.bash b/build_util/make_ios_xcframework.bash new file mode 100755 index 000000000..2d35fac88 --- /dev/null +++ b/build_util/make_ios_xcframework.bash @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +set -eu + +if [ ! -v IOS_X86_64_PATH ]; then # X86_64用のモジュールのディレクトリ(simulator) + echo "IOS_X86_64_PATHが未定義です" + exit 1 +fi +if [ ! -v IOS_AARCH64_SIM_PATH ]; then # AARCH64_SIM用のモジュールのディレクトリ(simulator) + echo "IOS_AARCH64_SIM_PATHが未定義です" + exit 1 +fi +if [ ! -v IOS_AARCH64_PATH ]; then # AARCH64用のモジュールのディレクトリ(実機) + echo "IOS_AARCH64_PATHが未定義です" + exit 1 +fi +if [ ! -v OUTPUT_ASSET_PATH ]; then # 出力するASSETのディレクトリ + echo "OUTPUT_ASSET_PATHが未定義です" + exit 1 +fi + +echo "* Get original onnxruntime file name from rpath" +output=$(otool -L "${IOS_AARCH64_PATH}/libvoicevox_core.dylib") +matched_line=$(echo "$output" | grep "@rpath" | grep "libonnxruntime") +if [[ $matched_line ]]; then + if [[ $matched_line =~ (@rpath/([^ ]+\.dylib)) ]]; then + dylib_string=${BASH_REMATCH[2]} + else + echo "Expected pattern not found in the matched line" + echo "$output" + exit 1 + fi +else + echo "No line containing '@rpath' and 'libonnxruntime' found" + echo "$output" + exit 1 +fi +echo "Original onnx dylib file name: $dylib_string" + +echo "* Copy Framework template" +arches=("aarch64" "sim") +artifacts=("${IOS_AARCH64_PATH}" "${IOS_AARCH64_SIM_PATH}") +for i in "${!arches[@]}"; do + arch="${arches[$i]}" + artifact="${artifacts[$i]}" + echo "* Copy Framework-${arch} template" + mkdir -p "Framework-${arch}/voicevox_core.framework/Headers" + cp -vr "crates/voicevox_core_c_api/xcframework/Frameworks/${arch}/" "Framework-${arch}/" + cp -v "${artifact}/voicevox_core.h" \ + "Framework-${arch}/voicevox_core.framework/Headers/voicevox_core.h" +done + +echo "* Create dylib" +# aarch64はdylibをコピー +cp -v "${IOS_AARCH64_PATH}/libvoicevox_core.dylib" \ + "Framework-aarch64/voicevox_core.framework/voicevox_core" + +# simはx86_64とarrch64を合わせてdylib作成 +lipo -create "${IOS_X86_64_PATH}/libvoicevox_core.dylib" \ + "${IOS_AARCH64_SIM_PATH}/libvoicevox_core.dylib" \ + -output "Framework-sim/voicevox_core.framework/voicevox_core" + +for arch in "${arches[@]}"; do + echo "* Change ${arch} @rpath" + # 自身への@rpathを変更 + install_name_tool -id "@rpath/voicevox_core.framework/voicevox_core" \ + "Framework-${arch}/voicevox_core.framework/voicevox_core" + + # 依存ライブラリonnxruntimeへの@rpathを変更 + install_name_tool -change "@rpath/$dylib_string" \ + "@rpath/onnxruntime.framework/onnxruntime" \ + "Framework-${arch}/voicevox_core.framework/voicevox_core" +done + +echo "* Create XCFramework" +mkdir -p "${OUTPUT_ASSET_PATH}" +xcodebuild -create-xcframework \ + -framework "Framework-sim/voicevox_core.framework" \ + -framework "Framework-aarch64/voicevox_core.framework" \ + -output "${OUTPUT_ASSET_PATH}/voicevox_core.xcframework" diff --git a/crates/test_util/Cargo.toml b/crates/test_util/Cargo.toml index 6394b321a..bea6e4d4e 100644 --- a/crates/test_util/Cargo.toml +++ b/crates/test_util/Cargo.toml @@ -3,7 +3,8 @@ name = "test_util" edition.workspace = true [dependencies] -async_zip = { workspace = true, features = ["full"] } +async_zip = { workspace = true, features = ["deflate"] } +futures-lite.workspace = true once_cell.workspace = true serde = { workspace = true, features = ["derive"] } serde_json.workspace = true diff --git a/crates/test_util/src/lib.rs b/crates/test_util/src/lib.rs index 05da2a33f..03e8309b8 100644 --- a/crates/test_util/src/lib.rs +++ b/crates/test_util/src/lib.rs @@ -1,6 +1,7 @@ mod typing; -use async_zip::{write::ZipFileWriter, Compression, ZipEntryBuilder}; +use async_zip::{base::write::ZipFileWriter, Compression, ZipEntryBuilder}; +use futures_lite::AsyncWriteExt as _; use once_cell::sync::Lazy; use std::{ collections::HashMap, @@ -8,7 +9,7 @@ use std::{ }; use tokio::{ fs::{self, File}, - io::{AsyncReadExt, AsyncWriteExt}, + io::AsyncReadExt, sync::Mutex, }; @@ -21,7 +22,7 @@ pub const OPEN_JTALK_DIC_DIR: &str = concat!( "/data/open_jtalk_dic_utf_8-1.11" ); -pub const EXAMPLE_DATA_JSON: &str = include_str!(concat!( +const EXAMPLE_DATA_JSON: &str = include_str!(concat!( env!("CARGO_MANIFEST_DIR"), "/data/example_data.json" )); @@ -50,18 +51,11 @@ pub async fn convert_zip_vvm(dir: impl AsRef) -> PathBuf { fs::create_dir_all(out_file_path.parent().unwrap()) .await .unwrap(); - let mut out_file = File::create(&out_file_path).await.unwrap(); - let mut writer = ZipFileWriter::new(&mut out_file); + let mut writer = ZipFileWriter::new(vec![]); for entry in dir.read_dir().unwrap().flatten() { let entry_builder = ZipEntryBuilder::new( - entry - .path() - .file_name() - .unwrap() - .to_str() - .unwrap() - .to_string(), + entry.path().file_name().unwrap().to_str().unwrap().into(), Compression::Deflate, ); let mut entry_writer = writer.write_entry_stream(entry_builder).await.unwrap(); @@ -71,7 +65,8 @@ pub async fn convert_zip_vvm(dir: impl AsRef) -> PathBuf { entry_writer.write_all(&buf).await.unwrap(); entry_writer.close().await.unwrap(); } - writer.close().await.unwrap(); + let zip = writer.close().await.unwrap(); + fs::write(&out_file_path, zip).await.unwrap(); } out_file_path } diff --git a/crates/voicevox_core/Cargo.toml b/crates/voicevox_core/Cargo.toml index fab7e4956..ebe346823 100644 --- a/crates/voicevox_core/Cargo.toml +++ b/crates/voicevox_core/Cargo.toml @@ -11,7 +11,7 @@ directml = ["voicevox-ort/directml"] [dependencies] anyhow.workspace = true -async_zip = { workspace = true, features = ["full"] } +async_zip = { workspace = true, features = ["deflate"] } camino.workspace = true derive-getters.workspace = true derive-new.workspace = true @@ -24,6 +24,7 @@ fs-err = { workspace = true, features = ["tokio"] } futures.workspace = true indexmap = { workspace = true, features = ["serde"] } itertools.workspace = true +jlabel.workspace = true nanoid.workspace = true ndarray.workspace = true once_cell.workspace = true @@ -33,6 +34,7 @@ rayon.workspace = true regex.workspace = true serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, features = ["preserve_order"] } +smallvec.workspace = true tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["rt"] } # FIXME: feature-gateする @@ -46,6 +48,7 @@ zip.workspace = true heck.workspace = true pretty_assertions.workspace = true rstest.workspace = true +rstest_reuse.workspace = true test_util.workspace = true tokio = { workspace = true, features = ["rt", "macros"] } diff --git a/crates/voicevox_core/src/engine/acoustic_feature_extractor.rs b/crates/voicevox_core/src/engine/acoustic_feature_extractor.rs index 4a30f71a5..5ee7ea540 100644 --- a/crates/voicevox_core/src/engine/acoustic_feature_extractor.rs +++ b/crates/voicevox_core/src/engine/acoustic_feature_extractor.rs @@ -61,7 +61,7 @@ static PHONEME_MAP: Lazy> = Lazy::new(|| { }); #[derive(Debug, Clone, PartialEq, new, Default, Getters)] -pub struct OjtPhoneme { +pub(crate) struct OjtPhoneme { phoneme: String, #[allow(dead_code)] start: f32, @@ -70,15 +70,15 @@ pub struct OjtPhoneme { } impl OjtPhoneme { - pub fn num_phoneme() -> usize { + pub(crate) fn num_phoneme() -> usize { PHONEME_MAP.len() } - pub fn space_phoneme() -> String { + fn space_phoneme() -> String { "pau".into() } - pub fn phoneme_id(&self) -> i64 { + pub(crate) fn phoneme_id(&self) -> i64 { if self.phoneme.is_empty() { -1 } else { @@ -86,7 +86,7 @@ impl OjtPhoneme { } } - pub fn convert(phonemes: &[OjtPhoneme]) -> Vec { + pub(crate) fn convert(phonemes: &[OjtPhoneme]) -> Vec { let mut phonemes = phonemes.to_owned(); if let Some(first_phoneme) = phonemes.first_mut() { if first_phoneme.phoneme.contains("sil") { diff --git a/crates/voicevox_core/src/engine/full_context_label.rs b/crates/voicevox_core/src/engine/full_context_label.rs index 79928d246..06cfdd0fd 100644 --- a/crates/voicevox_core/src/engine/full_context_label.rs +++ b/crates/voicevox_core/src/engine/full_context_label.rs @@ -1,10 +1,11 @@ -use std::collections::HashMap; +use std::str::FromStr; -use crate::engine::open_jtalk::FullcontextExtractor; -use derive_getters::Getters; -use derive_new::new; -use once_cell::sync::Lazy; -use regex::Regex; +use crate::{ + engine::{self, open_jtalk::FullcontextExtractor, MoraModel}, + AccentPhraseModel, +}; +use jlabel::{Label, Mora}; +use smallvec::SmallVec; // FIXME: 入力テキストをここで持って、メッセージに含む #[derive(thiserror::Error, Debug)] @@ -20,321 +21,544 @@ enum ErrorKind { #[display(fmt = "Open JTalkで解釈することができませんでした")] OpenJtalk, - #[display(fmt = "label parse error label: {label}")] - LabelParse { label: String }, + #[display(fmt = "jlabelでラベルを解釈することができませんでした")] + Jlabel, - #[display(fmt = "too long mora mora_phonemes: {mora_phonemes:?}")] - TooLongMora { mora_phonemes: Vec }, - - #[display(fmt = "invalid mora: {mora:?}")] - InvalidMora { mora: Box }, + #[display(fmt = "too long mora")] + TooLongMora, } type Result = std::result::Result; -#[derive(new, Getters, Clone, PartialEq, Eq, Debug)] -pub struct Phoneme { - contexts: HashMap, - label: String, -} - -static P3_REGEX: Lazy = Lazy::new(|| Regex::new(r"(\-(.*?)\+)").unwrap()); -static A2_REGEX: Lazy = Lazy::new(|| Regex::new(r"(\+(\d+|xx)\+)").unwrap()); -static A3_REGEX: Lazy = Lazy::new(|| Regex::new(r"(\+(\d+|xx)/B:)").unwrap()); -static F1_REGEX: Lazy = Lazy::new(|| Regex::new(r"(/F:(\d+|xx)_)").unwrap()); -static F2_REGEX: Lazy = Lazy::new(|| Regex::new(r"(_(\d+|xx)\#)").unwrap()); -static F3_REGEX: Lazy = Lazy::new(|| Regex::new(r"(\#(\d+|xx)_)").unwrap()); -static F5_REGEX: Lazy = Lazy::new(|| Regex::new(r"(@(\d+|xx)_)").unwrap()); -static H1_REGEX: Lazy = Lazy::new(|| Regex::new(r"(/H:(\d+|xx)_)").unwrap()); -static I3_REGEX: Lazy = Lazy::new(|| Regex::new(r"(@(\d+|xx)\+)").unwrap()); -static J1_REGEX: Lazy = Lazy::new(|| Regex::new(r"(/J:(\d+|xx)_)").unwrap()); - -fn string_feature_by_regex(re: &Regex, label: &str) -> std::result::Result { - if let Some(caps) = re.captures(label) { - Ok(caps[2].to_string()) - } else { - Err(ErrorKind::LabelParse { - label: label.into(), - }) - } -} - -impl Phoneme { - fn from_label(label: impl Into) -> std::result::Result { - let mut contexts = HashMap::::with_capacity(10); - let label = label.into(); - contexts.insert("p3".into(), string_feature_by_regex(&P3_REGEX, &label)?); - contexts.insert("a2".into(), string_feature_by_regex(&A2_REGEX, &label)?); - contexts.insert("a3".into(), string_feature_by_regex(&A3_REGEX, &label)?); - contexts.insert("f1".into(), string_feature_by_regex(&F1_REGEX, &label)?); - contexts.insert("f2".into(), string_feature_by_regex(&F2_REGEX, &label)?); - contexts.insert("f3".into(), string_feature_by_regex(&F3_REGEX, &label)?); - contexts.insert("f5".into(), string_feature_by_regex(&F5_REGEX, &label)?); - contexts.insert("h1".into(), string_feature_by_regex(&H1_REGEX, &label)?); - contexts.insert("i3".into(), string_feature_by_regex(&I3_REGEX, &label)?); - contexts.insert("j1".into(), string_feature_by_regex(&J1_REGEX, &label)?); - - Ok(Self::new(contexts, label)) - } - - pub fn phoneme(&self) -> &str { - self.contexts.get("p3").unwrap().as_str() - } - - pub fn is_pause(&self) -> bool { - self.contexts.get("f1").unwrap().as_str() == "xx" - } -} - -#[derive(new, Getters, Clone, PartialEq, Eq, Debug)] -pub struct Mora { - consonant: Option, - vowel: Phoneme, +pub(crate) fn extract_full_context_label( + open_jtalk: &impl FullcontextExtractor, + text: impl AsRef, +) -> Result> { + let labels = open_jtalk + .extract_fullcontext(text.as_ref()) + .map_err(|source| FullContextLabelError { + context: ErrorKind::OpenJtalk, + source: Some(source), + })?; + + let parsed_labels = labels + .into_iter() + .map(|s| Label::from_str(&s)) + .collect::, _>>() + .map_err(|source| FullContextLabelError { + context: ErrorKind::Jlabel, + source: Some(source.into()), + })?; + + generate_accent_phrases(&parsed_labels).map_err(|context| FullContextLabelError { + context, + source: None, + }) } -impl Mora { - pub fn set_context(&mut self, key: impl Into, value: impl Into) { - let key = key.into(); - let value = value.into(); - if let Some(ref mut consonant) = self.consonant { - consonant.contexts.insert(key.clone(), value.clone()); +fn generate_accent_phrases( + utterance: &[Label], +) -> std::result::Result, ErrorKind> { + let mut accent_phrases = Vec::with_capacity( + utterance + .first() + .map(|label| label.utterance.accent_phrase_count.into()) + .unwrap_or(0), + ); + + let split = utterance.chunk_by(|a, b| { + a.breath_group_curr == b.breath_group_curr && a.accent_phrase_curr == b.accent_phrase_curr + }); + for labels in split { + let moras = generate_moras(labels)?; + if moras.is_empty() { + continue; } - self.vowel.contexts.insert(key, value); - } - pub fn phonemes(&self) -> Vec { - if self.consonant.is_some() { - vec![ - self.consonant().as_ref().unwrap().clone(), - self.vowel.clone(), - ] + let Some(Label { + accent_phrase_curr: Some(ap_curr), + breath_group_curr: Some(bg_curr), + .. + }) = labels.first() + else { + continue; + }; + + // Breath Groupの中で最後のアクセント句かつ,Utteranceの中で最後のBreath Groupでない場合は次がpauになる + let pause_mora = if ap_curr.accent_phrase_position_backward == 1 + && bg_curr.breath_group_position_backward != 1 + { + Some(MoraModel::new( + "、".into(), + None, + None, + "pau".into(), + 0., + 0., + )) } else { - vec![self.vowel.clone()] - } - } + None + }; - #[allow(dead_code)] - pub fn labels(&self) -> Vec { - self.phonemes().iter().map(|p| p.label().clone()).collect() + // workaround for VOICEVOX/voicevox_engine#55 + let accent = usize::from(ap_curr.accent_position).min(moras.len()); + + accent_phrases.push(AccentPhraseModel::new( + moras, + accent, + pause_mora, + ap_curr.is_interrogative, + )) } + Ok(accent_phrases) } -#[derive(new, Getters, Clone, Debug, PartialEq, Eq)] -pub struct AccentPhrase { - moras: Vec, - accent: usize, - is_interrogative: bool, -} - -impl AccentPhrase { - fn from_phonemes(mut phonemes: Vec) -> std::result::Result { - let mut moras = Vec::with_capacity(phonemes.len()); - let mut mora_phonemes = Vec::with_capacity(phonemes.len()); - for i in 0..phonemes.len() { - { - let phoneme = phonemes.get_mut(i).unwrap(); - if phoneme.contexts().get("a2").map(|s| s.as_str()) == Some("49") { - break; - } - mora_phonemes.push(phoneme.clone()); +fn generate_moras(accent_phrase: &[Label]) -> std::result::Result, ErrorKind> { + let mut moras = Vec::with_capacity(accent_phrase.len()); + + let split = accent_phrase.chunk_by(|a, b| a.mora == b.mora); + for labels in split { + let labels: SmallVec<[&Label; 3]> = + labels.iter().filter(|label| label.mora.is_some()).collect(); + match labels[..] { + [consonant, vowel] => { + let mora = generate_mora(Some(consonant), vowel); + moras.push(mora); } - - if i + 1 == phonemes.len() - || phonemes.get(i).unwrap().contexts().get("a2").unwrap() - != phonemes.get(i + 1).unwrap().contexts().get("a2").unwrap() - { - if mora_phonemes.len() == 1 { - moras.push(Mora::new(None, mora_phonemes[0].clone())); - } else if mora_phonemes.len() == 2 { - moras.push(Mora::new( - Some(mora_phonemes[0].clone()), - mora_phonemes[1].clone(), - )); - } else { - return Err(ErrorKind::TooLongMora { mora_phonemes }); - } - mora_phonemes.clear(); + [vowel] => { + let mora = generate_mora(None, vowel); + moras.push(mora); + } + // silやpau以外の音素がないモーラは含めない + [] => {} + + // 音素が3つ以上ある場合: + // position_forwardとposition_backwardが飽和している場合は無視する + [Label { + mora: + Some(Mora { + position_forward: 49, + position_backward: 49, + .. + }), + .. + }, ..] => {} + _ => { + return Err(ErrorKind::TooLongMora); } - } - - let mora = &moras[0]; - let mut accent: usize = mora - .vowel() - .contexts() - .get("f2") - .ok_or_else(|| ErrorKind::InvalidMora { - mora: mora.clone().into(), - })? - .parse() - .map_err(|_| ErrorKind::InvalidMora { - mora: mora.clone().into(), - })?; - - let is_interrogative = moras - .last() - .unwrap() - .vowel() - .contexts() - .get("f3") - .map(|s| s.as_str()) - == Some("1"); - // workaround for VOICEVOX/voicevox_engine#55 - if accent > moras.len() { - accent = moras.len(); - } - - Ok(Self::new(moras, accent, is_interrogative)) - } - - #[allow(dead_code)] - pub fn set_context(&mut self, key: impl Into, value: impl Into) { - let key = key.into(); - let value = value.into(); - for mora in self.moras.iter_mut() { - mora.set_context(&key, &value); } } - - pub fn phonemes(&self) -> Vec { - self.moras.iter().flat_map(|m| m.phonemes()).collect() - } - - #[allow(dead_code)] - pub fn labels(&self) -> Vec { - self.phonemes().iter().map(|p| p.label().clone()).collect() - } - - #[allow(dead_code)] - pub fn merge(&self, accent_phrase: AccentPhrase) -> AccentPhrase { - let mut moras = self.moras().clone(); - let is_interrogative = *accent_phrase.is_interrogative(); - moras.extend(accent_phrase.moras); - AccentPhrase::new(moras, *self.accent(), is_interrogative) - } + Ok(moras) } -#[derive(new, Getters, Clone, PartialEq, Eq, Debug)] -pub struct BreathGroup { - accent_phrases: Vec, +fn generate_mora(consonant: Option<&Label>, vowel: &Label) -> MoraModel { + let consonant_phoneme = consonant.and_then(|c| c.phoneme.c.to_owned()); + let vowel_phoneme = vowel.phoneme.c.as_deref().unwrap(); + MoraModel::new( + mora_to_text(consonant_phoneme.as_deref(), vowel_phoneme), + consonant_phoneme, + consonant.and(Some(0.0)), + vowel_phoneme.to_string(), + 0.0, + 0.0, + ) } -impl BreathGroup { - fn from_phonemes(phonemes: Vec) -> std::result::Result { - let mut accent_phrases = Vec::with_capacity(phonemes.len()); - let mut accent_phonemes = Vec::with_capacity(phonemes.len()); - for i in 0..phonemes.len() { - accent_phonemes.push(phonemes.get(i).unwrap().clone()); - if i + 1 == phonemes.len() - || phonemes.get(i).unwrap().contexts().get("i3").unwrap() - != phonemes.get(i + 1).unwrap().contexts().get("i3").unwrap() - || phonemes.get(i).unwrap().contexts().get("f5").unwrap() - != phonemes.get(i + 1).unwrap().contexts().get("f5").unwrap() - { - accent_phrases.push(AccentPhrase::from_phonemes(accent_phonemes.clone())?); - accent_phonemes.clear(); - } +pub fn mora_to_text(consonant: Option<&str>, vowel: &str) -> String { + let mora_text = format!( + "{}{}", + consonant.unwrap_or(""), + match vowel { + phoneme @ ("A" | "I" | "U" | "E" | "O") => phoneme.to_lowercase(), + phoneme => phoneme.to_string(), } + ); + // もしカタカナに変換できなければ、引数で与えた文字列がそのまま返ってくる + engine::mora2text(&mora_text).to_string() +} - Ok(Self::new(accent_phrases)) +// FIXME: Rust 1.77の新機能導入と共にこれを消す +#[allow(unused_imports)] +mod chunk_by { + // Implementations in this module were copied from + // [Rust](https://github.com/rust-lang/rust/blob/746a58d4359786e4aebb372a30829706fa5a968f/library/core/src/slice/iter.rs). + + // MIT License Notice + + // Permission is hereby granted, free of charge, to any + // person obtaining a copy of this software and associated + // documentation files (the "Software"), to deal in the + // Software without restriction, including without + // limitation the rights to use, copy, modify, merge, + // publish, distribute, sublicense, and/or sell copies of + // the Software, and to permit persons to whom the Software + // is furnished to do so, subject to the following + // conditions: + // + // The above copyright notice and this permission notice + // shall be included in all copies or substantial portions + // of the Software. + // + // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF + // ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED + // TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + // PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT + // SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + // DEALINGS IN THE SOFTWARE. + + pub struct ChunkBy<'a, T, P> { + slice: &'a [T], + predicate: P, } - - #[allow(dead_code)] - pub fn set_context(&mut self, key: impl Into, value: impl Into) { - let key = key.into(); - let value = value.into(); - for accent_phrase in self.accent_phrases.iter_mut() { - accent_phrase.set_context(&key, &value); + impl<'a, T, P> ChunkBy<'a, T, P> { + pub(super) fn new(slice: &'a [T], predicate: P) -> Self { + ChunkBy { slice, predicate } } } + impl<'a, T, P> Iterator for ChunkBy<'a, T, P> + where + P: FnMut(&T, &T) -> bool, + { + type Item = &'a [T]; + + #[inline] + fn next(&mut self) -> Option { + if self.slice.is_empty() { + None + } else { + let mut len = 1; + let mut iter = self.slice.windows(2); + while let Some([l, r]) = iter.next() { + if (self.predicate)(l, r) { + len += 1 + } else { + break; + } + } + let (head, tail) = self.slice.split_at(len); + self.slice = tail; + Some(head) + } + } - pub fn phonemes(&self) -> Vec { - self.accent_phrases() - .iter() - .flat_map(|a| a.phonemes()) - .collect() + #[inline] + fn size_hint(&self) -> (usize, Option) { + if self.slice.is_empty() { + (0, Some(0)) + } else { + (1, Some(self.slice.len())) + } + } } - #[allow(dead_code)] - pub fn labels(&self) -> Vec { - self.phonemes().iter().map(|p| p.label().clone()).collect() + #[easy_ext::ext(TChunkBy)] + impl [T] { + pub fn chunk_by(&self, pred: F) -> ChunkBy<'_, T, F> + where + F: FnMut(&T, &T) -> bool, + { + ChunkBy::new(self, pred) + } } -} -#[derive(new, Getters, Clone, PartialEq, Eq, Debug)] -pub struct Utterance { - breath_groups: Vec, - pauses: Vec, + #[cfg(test)] + mod tests { + use super::TChunkBy; + + #[test] + fn chunk_by() { + let mut split = [0, 0, 1, 1, 1, -5].chunk_by(|a, b| a == b); + assert_eq!(split.next(), Some([0, 0].as_slice())); + assert_eq!(split.next(), Some([1, 1, 1].as_slice())); + assert_eq!(split.next(), Some([-5].as_slice())); + assert_eq!(split.next(), None); + } + } } -impl Utterance { - fn from_phonemes(phonemes: Vec) -> std::result::Result { - let mut breath_groups = vec![]; - let mut group_phonemes = Vec::with_capacity(phonemes.len()); - let mut pauses = vec![]; - for phoneme in phonemes.into_iter() { - if !phoneme.is_pause() { - group_phonemes.push(phoneme); - } else { - pauses.push(phoneme); - - if !group_phonemes.is_empty() { - breath_groups.push(BreathGroup::from_phonemes(group_phonemes.clone())?); - group_phonemes.clear(); - } - } - } - Ok(Self::new(breath_groups, pauses)) +#[cfg(test)] +mod tests { + use rstest_reuse::*; + + use ::test_util::OPEN_JTALK_DIC_DIR; + use rstest::rstest; + + use std::str::FromStr; + + use crate::{ + engine::{ + full_context_label::{extract_full_context_label, generate_accent_phrases}, + open_jtalk::FullcontextExtractor, + MoraModel, + }, + AccentPhraseModel, + }; + use jlabel::Label; + + fn mora(text: &str, consonant: Option<&str>, vowel: &str) -> MoraModel { + MoraModel::new( + text.into(), + consonant.map(|c| c.into()), + consonant.and(Some(0.0)), + vowel.into(), + 0.0, + 0.0, + ) } - #[allow(dead_code)] - pub fn set_context(&mut self, key: impl Into, value: impl Into) { - let key = key.into(); - let value = value.into(); - for breath_group in self.breath_groups.iter_mut() { - breath_group.set_context(&key, &value); - } + #[template] + #[rstest] + #[case( + "いぇ", + &[ + "xx^xx-sil+y=e/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:1_1%0_xx_xx/H:xx_xx/I:xx-xx@xx+xx&xx-xx|xx+xx/J:1_1/K:1+1-1", + "xx^sil-y+e=sil/A:0+1+1/B:xx-xx_xx/C:09_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:1_1#0_xx@1_1|1_1/G:xx_xx%xx_xx_xx/H:xx_xx/I:1-1@1+1&1-1|1+1/J:xx_xx/K:1+1-1", + "sil^y-e+sil=xx/A:0+1+1/B:xx-xx_xx/C:09_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:1_1#0_xx@1_1|1_1/G:xx_xx%xx_xx_xx/H:xx_xx/I:1-1@1+1&1-1|1+1/J:xx_xx/K:1+1-1", + "y^e-sil+xx=xx/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:xx+xx_xx/E:1_1!0_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:xx_xx%xx_xx_xx/H:1_1/I:xx-xx@xx+xx&xx-xx|xx+xx/J:xx_xx/K:1+1-1", + ], + &[ + AccentPhraseModel::new( + vec![mora("イェ", Some("y"), "e")], + 1, + None, + false, + ) + ] + )] + #[case( + "んーっ", + &[ + "xx^xx-sil+N=N/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:3_3%0_xx_xx/H:xx_xx/I:xx-xx@xx+xx&xx-xx|xx+xx/J:1_3/K:1+1-3", + "xx^sil-N+N=cl/A:-2+1+3/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx/F:3_3#0_xx@1_1|1_3/G:xx_xx%xx_xx_xx/H:xx_xx/I:1-3@1+1&1-1|1+3/J:xx_xx/K:1+1-3", + "sil^N-N+cl=sil/A:-1+2+2/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx/F:3_3#0_xx@1_1|1_3/G:xx_xx%xx_xx_xx/H:xx_xx/I:1-3@1+1&1-1|1+3/J:xx_xx/K:1+1-3", + "N^N-cl+sil=xx/A:0+3+1/B:09-xx_xx/C:09_xx+xx/D:xx+xx_xx/E:xx_xx!xx_xx-xx/F:3_3#0_xx@1_1|1_3/G:xx_xx%xx_xx_xx/H:xx_xx/I:1-3@1+1&1-1|1+3/J:xx_xx/K:1+1-3", + "N^cl-sil+xx=xx/A:xx+xx+xx/B:09-xx_xx/C:xx_xx+xx/D:xx+xx_xx/E:3_3!0_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:xx_xx%xx_xx_xx/H:1_3/I:xx-xx@xx+xx&xx-xx|xx+xx/J:xx_xx/K:1+1-3", + ], + &[ + AccentPhraseModel::new( + vec![ + mora("ン", None, "N"), + mora("ン", None, "N"), + mora("ッ", None, "cl"), + ], + 3, + None, + false, + ), + ] + )] + #[case( + "これはテストです", + &[ + "xx^xx-sil+k=o/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:04+xx_xx/E:xx_xx!xx_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:3_3%0_xx_xx/H:xx_xx/I:xx-xx@xx+xx&xx-xx|xx+xx/J:2_8/K:1+2-8", + "xx^sil-k+o=r/A:-2+1+3/B:xx-xx_xx/C:04_xx+xx/D:24+xx_xx/E:xx_xx!xx_xx-xx/F:3_3#0_xx@1_2|1_8/G:5_1%0_xx_1/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "sil^k-o+r=e/A:-2+1+3/B:xx-xx_xx/C:04_xx+xx/D:24+xx_xx/E:xx_xx!xx_xx-xx/F:3_3#0_xx@1_2|1_8/G:5_1%0_xx_1/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "k^o-r+e=w/A:-1+2+2/B:xx-xx_xx/C:04_xx+xx/D:24+xx_xx/E:xx_xx!xx_xx-xx/F:3_3#0_xx@1_2|1_8/G:5_1%0_xx_1/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "o^r-e+w=a/A:-1+2+2/B:xx-xx_xx/C:04_xx+xx/D:24+xx_xx/E:xx_xx!xx_xx-xx/F:3_3#0_xx@1_2|1_8/G:5_1%0_xx_1/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "r^e-w+a=t/A:0+3+1/B:04-xx_xx/C:24_xx+xx/D:03+xx_xx/E:xx_xx!xx_xx-xx/F:3_3#0_xx@1_2|1_8/G:5_1%0_xx_1/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "e^w-a+t=e/A:0+3+1/B:04-xx_xx/C:24_xx+xx/D:03+xx_xx/E:xx_xx!xx_xx-xx/F:3_3#0_xx@1_2|1_8/G:5_1%0_xx_1/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "w^a-t+e=s/A:0+1+5/B:24-xx_xx/C:03_xx+xx/D:10+7_2/E:3_3!0_xx-1/F:5_1#0_xx@2_1|4_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "a^t-e+s=U/A:0+1+5/B:24-xx_xx/C:03_xx+xx/D:10+7_2/E:3_3!0_xx-1/F:5_1#0_xx@2_1|4_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "t^e-s+U=t/A:1+2+4/B:24-xx_xx/C:03_xx+xx/D:10+7_2/E:3_3!0_xx-1/F:5_1#0_xx@2_1|4_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "e^s-U+t=o/A:1+2+4/B:24-xx_xx/C:03_xx+xx/D:10+7_2/E:3_3!0_xx-1/F:5_1#0_xx@2_1|4_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "s^U-t+o=d/A:2+3+3/B:24-xx_xx/C:03_xx+xx/D:10+7_2/E:3_3!0_xx-1/F:5_1#0_xx@2_1|4_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "U^t-o+d=e/A:2+3+3/B:24-xx_xx/C:03_xx+xx/D:10+7_2/E:3_3!0_xx-1/F:5_1#0_xx@2_1|4_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "t^o-d+e=s/A:3+4+2/B:03-xx_xx/C:10_7+2/D:xx+xx_xx/E:3_3!0_xx-1/F:5_1#0_xx@2_1|4_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "o^d-e+s=U/A:3+4+2/B:03-xx_xx/C:10_7+2/D:xx+xx_xx/E:3_3!0_xx-1/F:5_1#0_xx@2_1|4_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "d^e-s+U=sil/A:4+5+1/B:03-xx_xx/C:10_7+2/D:xx+xx_xx/E:3_3!0_xx-1/F:5_1#0_xx@2_1|4_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "e^s-U+sil=xx/A:4+5+1/B:03-xx_xx/C:10_7+2/D:xx+xx_xx/E:3_3!0_xx-1/F:5_1#0_xx@2_1|4_5/G:xx_xx%xx_xx_xx/H:xx_xx/I:2-8@1+1&1-2|1+8/J:xx_xx/K:1+2-8", + "s^U-sil+xx=xx/A:xx+xx+xx/B:10-7_2/C:xx_xx+xx/D:xx+xx_xx/E:5_1!0_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:xx_xx%xx_xx_xx/H:2_8/I:xx-xx@xx+xx&xx-xx|xx+xx/J:xx_xx/K:1+2-8", + ], + &[ + AccentPhraseModel::new( + vec![ + mora("コ", Some("k"), "o"), + mora("レ", Some("r"), "e"), + mora("ワ", Some("w"), "a"), + ], + 3, + None, + false, + ), + AccentPhraseModel::new( + vec![ + mora("テ", Some("t"), "e"), + mora("ス", Some("s"), "U"), + mora("ト", Some("t"), "o"), + mora("デ", Some("d"), "e"), + mora("ス", Some("s"), "U"), + ], + 1, + None, + false, + ), + ] + )] + #[case( + "1、1000、100万、1億?", + &[ + "xx^xx-sil+i=ch/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:05+xx_xx/E:xx_xx!xx_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:2_2%0_xx_xx/H:xx_xx/I:xx-xx@xx+xx&xx-xx|xx+xx/J:1_2/K:4+4-12", + "xx^sil-i+ch=i/A:-1+1+2/B:xx-xx_xx/C:05_xx+xx/D:05+xx_xx/E:xx_xx!xx_xx-xx/F:2_2#0_xx@1_1|1_2/G:2_1%0_xx_0/H:xx_xx/I:1-2@1+4&1-4|1+12/J:1_2/K:4+4-12", + "sil^i-ch+i=pau/A:0+2+1/B:xx-xx_xx/C:05_xx+xx/D:05+xx_xx/E:xx_xx!xx_xx-xx/F:2_2#0_xx@1_1|1_2/G:2_1%0_xx_0/H:xx_xx/I:1-2@1+4&1-4|1+12/J:1_2/K:4+4-12", + "i^ch-i+pau=s/A:0+2+1/B:xx-xx_xx/C:05_xx+xx/D:05+xx_xx/E:xx_xx!xx_xx-xx/F:2_2#0_xx@1_1|1_2/G:2_1%0_xx_0/H:xx_xx/I:1-2@1+4&1-4|1+12/J:1_2/K:4+4-12", + "ch^i-pau+s=e/A:xx+xx+xx/B:05-xx_xx/C:xx_xx+xx/D:05+xx_xx/E:2_2!0_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:2_1%0_xx_xx/H:1_2/I:xx-xx@xx+xx&xx-xx|xx+xx/J:1_2/K:4+4-12", + "i^pau-s+e=N/A:0+1+2/B:05-xx_xx/C:05_xx+xx/D:05+xx_xx/E:2_2!0_xx-0/F:2_1#0_xx@1_1|1_2/G:4_3%0_xx_0/H:1_2/I:1-2@2+3&2-3|3+10/J:1_4/K:4+4-12", + "pau^s-e+N=pau/A:0+1+2/B:05-xx_xx/C:05_xx+xx/D:05+xx_xx/E:2_2!0_xx-0/F:2_1#0_xx@1_1|1_2/G:4_3%0_xx_0/H:1_2/I:1-2@2+3&2-3|3+10/J:1_4/K:4+4-12", + "s^e-N+pau=hy/A:1+2+1/B:05-xx_xx/C:05_xx+xx/D:05+xx_xx/E:2_2!0_xx-0/F:2_1#0_xx@1_1|1_2/G:4_3%0_xx_0/H:1_2/I:1-2@2+3&2-3|3+10/J:1_4/K:4+4-12", + "e^N-pau+hy=a/A:xx+xx+xx/B:05-xx_xx/C:xx_xx+xx/D:05+xx_xx/E:2_1!0_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:4_3%0_xx_xx/H:1_2/I:xx-xx@xx+xx&xx-xx|xx+xx/J:1_4/K:4+4-12", + "N^pau-hy+a=k/A:-2+1+4/B:05-xx_xx/C:05_xx+xx/D:05+xx_xx/E:2_1!0_xx-0/F:4_3#0_xx@1_1|1_4/G:4_2%1_xx_0/H:1_2/I:1-4@3+2&3-2|5+8/J:1_4/K:4+4-12", + "pau^hy-a+k=u/A:-2+1+4/B:05-xx_xx/C:05_xx+xx/D:05+xx_xx/E:2_1!0_xx-0/F:4_3#0_xx@1_1|1_4/G:4_2%1_xx_0/H:1_2/I:1-4@3+2&3-2|5+8/J:1_4/K:4+4-12", + "hy^a-k+u=m/A:-1+2+3/B:05-xx_xx/C:05_xx+xx/D:05+xx_xx/E:2_1!0_xx-0/F:4_3#0_xx@1_1|1_4/G:4_2%1_xx_0/H:1_2/I:1-4@3+2&3-2|5+8/J:1_4/K:4+4-12", + "a^k-u+m=a/A:-1+2+3/B:05-xx_xx/C:05_xx+xx/D:05+xx_xx/E:2_1!0_xx-0/F:4_3#0_xx@1_1|1_4/G:4_2%1_xx_0/H:1_2/I:1-4@3+2&3-2|5+8/J:1_4/K:4+4-12", + "k^u-m+a=N/A:0+3+2/B:05-xx_xx/C:05_xx+xx/D:05+xx_xx/E:2_1!0_xx-0/F:4_3#0_xx@1_1|1_4/G:4_2%1_xx_0/H:1_2/I:1-4@3+2&3-2|5+8/J:1_4/K:4+4-12", + "u^m-a+N=pau/A:0+3+2/B:05-xx_xx/C:05_xx+xx/D:05+xx_xx/E:2_1!0_xx-0/F:4_3#0_xx@1_1|1_4/G:4_2%1_xx_0/H:1_2/I:1-4@3+2&3-2|5+8/J:1_4/K:4+4-12", + "m^a-N+pau=i/A:1+4+1/B:05-xx_xx/C:05_xx+xx/D:05+xx_xx/E:2_1!0_xx-0/F:4_3#0_xx@1_1|1_4/G:4_2%1_xx_0/H:1_2/I:1-4@3+2&3-2|5+8/J:1_4/K:4+4-12", + "a^N-pau+i=ch/A:xx+xx+xx/B:05-xx_xx/C:xx_xx+xx/D:05+xx_xx/E:4_3!0_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:4_2%1_xx_xx/H:1_4/I:xx-xx@xx+xx&xx-xx|xx+xx/J:1_4/K:4+4-12", + "N^pau-i+ch=i/A:-1+1+4/B:05-xx_xx/C:05_xx+xx/D:05+xx_xx/E:4_3!0_xx-0/F:4_2#1_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_4/I:1-4@4+1&4-1|9+4/J:xx_xx/K:4+4-12", + "pau^i-ch+i=o/A:0+2+3/B:05-xx_xx/C:05_xx+xx/D:05+xx_xx/E:4_3!0_xx-0/F:4_2#1_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_4/I:1-4@4+1&4-1|9+4/J:xx_xx/K:4+4-12", + "i^ch-i+o=k/A:0+2+3/B:05-xx_xx/C:05_xx+xx/D:05+xx_xx/E:4_3!0_xx-0/F:4_2#1_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_4/I:1-4@4+1&4-1|9+4/J:xx_xx/K:4+4-12", + "ch^i-o+k=u/A:1+3+2/B:05-xx_xx/C:05_xx+xx/D:xx+xx_xx/E:4_3!0_xx-0/F:4_2#1_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_4/I:1-4@4+1&4-1|9+4/J:xx_xx/K:4+4-12", + "i^o-k+u=sil/A:2+4+1/B:05-xx_xx/C:05_xx+xx/D:xx+xx_xx/E:4_3!0_xx-0/F:4_2#1_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_4/I:1-4@4+1&4-1|9+4/J:xx_xx/K:4+4-12", + "o^k-u+sil=xx/A:2+4+1/B:05-xx_xx/C:05_xx+xx/D:xx+xx_xx/E:4_3!0_xx-0/F:4_2#1_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_4/I:1-4@4+1&4-1|9+4/J:xx_xx/K:4+4-12", + "k^u-sil+xx=xx/A:xx+xx+xx/B:05-xx_xx/C:xx_xx+xx/D:xx+xx_xx/E:4_2!1_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:xx_xx%xx_xx_xx/H:1_4/I:xx-xx@xx+xx&xx-xx|xx+xx/J:xx_xx/K:4+4-12", + ], + &[ + AccentPhraseModel::new( + vec![ + mora("イ", None, "i"), + mora("チ", Some("ch"), "i"), + ], + 2, + Some(mora("、", None, "pau")), + false, + ), + AccentPhraseModel::new( + vec![ + mora("セ", Some("s"), "e"), + mora("ン", None, "N"), + ], + 1, + Some(mora("、", None, "pau")), + false, + ), + AccentPhraseModel::new( + vec![ + mora("ヒャ", Some("hy"), "a"), + mora("ク", Some("k"), "u"), + mora("マ", Some("m"), "a"), + mora("ン", None, "N"), + ], + 3, + Some(mora("、", None, "pau")), + false, + ), + AccentPhraseModel::new( + vec![ + mora("イ", None, "i"), + mora("チ", Some("ch"), "i"), + mora("オ", None, "o"), + mora("ク", Some("k"), "u"), + ], + 2, + None, + true, + ), + ] + )] + #[case( + "クヮルテット。あーあ、。", + &[ + "xx^xx-sil+kw=a/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:02+xx_xx/E:xx_xx!xx_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:5_3%0_xx_xx/H:xx_xx/I:xx-xx@xx+xx&xx-xx|xx+xx/J:1_5/K:2+3-8", + "xx^sil-kw+a=r/A:-2+1+5/B:xx-xx_xx/C:02_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx/F:5_3#0_xx@1_1|1_5/G:2_1%0_xx_0/H:xx_xx/I:1-5@1+2&1-3|1+8/J:2_3/K:2+3-8", + "sil^kw-a+r=u/A:-2+1+5/B:xx-xx_xx/C:02_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx/F:5_3#0_xx@1_1|1_5/G:2_1%0_xx_0/H:xx_xx/I:1-5@1+2&1-3|1+8/J:2_3/K:2+3-8", + "kw^a-r+u=t/A:-1+2+4/B:xx-xx_xx/C:02_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx/F:5_3#0_xx@1_1|1_5/G:2_1%0_xx_0/H:xx_xx/I:1-5@1+2&1-3|1+8/J:2_3/K:2+3-8", + "a^r-u+t=e/A:-1+2+4/B:xx-xx_xx/C:02_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx/F:5_3#0_xx@1_1|1_5/G:2_1%0_xx_0/H:xx_xx/I:1-5@1+2&1-3|1+8/J:2_3/K:2+3-8", + "r^u-t+e=cl/A:0+3+3/B:xx-xx_xx/C:02_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx/F:5_3#0_xx@1_1|1_5/G:2_1%0_xx_0/H:xx_xx/I:1-5@1+2&1-3|1+8/J:2_3/K:2+3-8", + "u^t-e+cl=t/A:0+3+3/B:xx-xx_xx/C:02_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx/F:5_3#0_xx@1_1|1_5/G:2_1%0_xx_0/H:xx_xx/I:1-5@1+2&1-3|1+8/J:2_3/K:2+3-8", + "t^e-cl+t=o/A:1+4+2/B:xx-xx_xx/C:02_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx/F:5_3#0_xx@1_1|1_5/G:2_1%0_xx_0/H:xx_xx/I:1-5@1+2&1-3|1+8/J:2_3/K:2+3-8", + "e^cl-t+o=pau/A:2+5+1/B:xx-xx_xx/C:02_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx/F:5_3#0_xx@1_1|1_5/G:2_1%0_xx_0/H:xx_xx/I:1-5@1+2&1-3|1+8/J:2_3/K:2+3-8", + "cl^t-o+pau=a/A:2+5+1/B:xx-xx_xx/C:02_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx/F:5_3#0_xx@1_1|1_5/G:2_1%0_xx_0/H:xx_xx/I:1-5@1+2&1-3|1+8/J:2_3/K:2+3-8", + "t^o-pau+a=a/A:xx+xx+xx/B:02-xx_xx/C:xx_xx+xx/D:09+xx_xx/E:5_3!0_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:2_1%0_xx_xx/H:1_5/I:xx-xx@xx+xx&xx-xx|xx+xx/J:2_3/K:2+3-8", + "o^pau-a+a=a/A:0+1+2/B:02-xx_xx/C:09_xx+xx/D:09+xx_xx/E:5_3!0_xx-0/F:2_1#0_xx@1_2|1_3/G:1_1%0_xx_1/H:1_5/I:2-3@2+1&2-2|6+3/J:xx_xx/K:2+3-8", + "pau^a-a+a=sil/A:1+2+1/B:02-xx_xx/C:09_xx+xx/D:09+xx_xx/E:5_3!0_xx-0/F:2_1#0_xx@1_2|1_3/G:1_1%0_xx_1/H:1_5/I:2-3@2+1&2-2|6+3/J:xx_xx/K:2+3-8", + "a^a-a+sil=xx/A:0+1+1/B:09-xx_xx/C:09_xx+xx/D:xx+xx_xx/E:2_1!0_xx-1/F:1_1#0_xx@2_1|3_1/G:xx_xx%xx_xx_xx/H:1_5/I:2-3@2+1&2-2|6+3/J:xx_xx/K:2+3-8", + "a^a-sil+xx=xx/A:xx+xx+xx/B:09-xx_xx/C:xx_xx+xx/D:xx+xx_xx/E:1_1!0_xx-xx/F:xx_xx#xx_xx@xx_xx|xx_xx/G:xx_xx%xx_xx_xx/H:2_3/I:xx-xx@xx+xx&xx-xx|xx+xx/J:xx_xx/K:2+3-8", + ], + &[ + AccentPhraseModel::new( + vec![ + mora("クヮ", Some("kw"), "a"), + mora("ル", Some("r"), "u"), + mora("テ", Some("t"), "e"), + mora("ッ", None, "cl"), + mora("ト", Some("t"), "o"), + ], + 3, + Some(mora("、", None, "pau")), + false, + ), + AccentPhraseModel::new( + vec![ + mora("ア", None, "a"), + mora("ア", None, "a"), + ], + 1, + None, + false, + ), + AccentPhraseModel::new( + vec![mora("ア", None, "a")], + 1, + None, + false, + ), + ] + )] + fn label_cases( + #[case] text: &str, + #[case] labels: &[&str], + #[case] accent_phrase: &[AccentPhraseModel], + ) { } - #[allow(dead_code)] - pub fn phonemes(&self) -> Vec { - // TODO:実装が中途半端なのであとでちゃんと実装する必要があるらしい - // https://github.com/VOICEVOX/voicevox_core/pull/174#discussion_r919982651 - let mut phonemes = Vec::with_capacity(self.breath_groups.len()); - - for i in 0..self.pauses().len() { - phonemes.push(self.pauses().get(i).unwrap().clone()); - if i < self.pauses().len() - 1 { - let p = self.breath_groups().get(i).unwrap().phonemes(); - phonemes.extend(p); - } - } - phonemes + #[apply(label_cases)] + #[tokio::test] + async fn open_jtalk(text: &str, labels: &[&str], _accent_phrase: &[AccentPhraseModel]) { + let open_jtalk = crate::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) + .await + .unwrap(); + assert_eq!(&open_jtalk.extract_fullcontext(text).unwrap(), labels); } - #[allow(dead_code)] - pub fn labels(&self) -> Vec { - self.phonemes().iter().map(|p| p.label().clone()).collect() + #[apply(label_cases)] + fn parse_labels(_text: &str, labels: &[&str], accent_phrase: &[AccentPhraseModel]) { + let parsed_labels = labels + .iter() + .map(|s| Label::from_str(s).unwrap()) + .collect::>(); + + assert_eq!( + &generate_accent_phrases(&parsed_labels).unwrap(), + accent_phrase + ); } - pub(crate) fn extract_full_context_label( - open_jtalk: &impl FullcontextExtractor, - text: impl AsRef, - ) -> Result { - let labels = open_jtalk - .extract_fullcontext(text.as_ref()) - .map_err(|source| FullContextLabelError { - context: ErrorKind::OpenJtalk, - source: Some(source), - })?; - - labels - .into_iter() - .map(Phoneme::from_label) - .collect::, _>>() - .and_then(Self::from_phonemes) - .map_err(|context| FullContextLabelError { - context, - source: None, - }) + #[apply(label_cases)] + #[tokio::test] + async fn extract_fullcontext( + text: &str, + _labels: &[&str], + accent_phrase: &[AccentPhraseModel], + ) { + let open_jtalk = crate::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) + .await + .unwrap(); + assert_eq!( + &extract_full_context_label(&open_jtalk, text).unwrap(), + accent_phrase + ); } } diff --git a/crates/voicevox_core/src/engine/kana_parser.rs b/crates/voicevox_core/src/engine/kana_parser.rs index fe10c5b3f..eaa3d93c1 100644 --- a/crates/voicevox_core/src/engine/kana_parser.rs +++ b/crates/voicevox_core/src/engine/kana_parser.rs @@ -163,7 +163,7 @@ pub(crate) fn parse_kana(text: &str) -> KanaParseResult> Ok(parsed_result) } -pub fn create_kana(accent_phrases: &[AccentPhraseModel]) -> String { +pub(crate) fn create_kana(accent_phrases: &[AccentPhraseModel]) -> String { let mut text = String::new(); for phrase in accent_phrases { let moras = phrase.moras(); diff --git a/crates/voicevox_core/src/engine/mod.rs b/crates/voicevox_core/src/engine/mod.rs index 1c7422e76..95fe3d562 100644 --- a/crates/voicevox_core/src/engine/mod.rs +++ b/crates/voicevox_core/src/engine/mod.rs @@ -6,7 +6,9 @@ mod mora_list; pub(crate) mod open_jtalk; pub(crate) use self::acoustic_feature_extractor::OjtPhoneme; -pub(crate) use self::full_context_label::{FullContextLabelError, Utterance}; +pub(crate) use self::full_context_label::{ + extract_full_context_label, mora_to_text, FullContextLabelError, +}; pub(crate) use self::kana_parser::{create_kana, parse_kana, KanaParseError}; pub use self::model::{AccentPhraseModel, AudioQueryModel, MoraModel}; pub(crate) use self::mora_list::mora2text; diff --git a/crates/voicevox_core/src/engine/model.rs b/crates/voicevox_core/src/engine/model.rs index 77adbebe7..de0f388f9 100644 --- a/crates/voicevox_core/src/engine/model.rs +++ b/crates/voicevox_core/src/engine/model.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; /* 各フィールドのjsonフィールド名はsnake_caseとする*/ /// モーラ(子音+母音)ごとの情報。 -#[derive(Clone, Debug, new, Getters, Deserialize, Serialize)] +#[derive(Clone, Debug, new, Getters, Deserialize, Serialize, PartialEq)] pub struct MoraModel { /// 文字。 text: String, @@ -22,7 +22,7 @@ pub struct MoraModel { } /// AccentPhrase (アクセント句ごとの情報)。 -#[derive(Clone, Debug, new, Getters, Deserialize, Serialize)] +#[derive(Clone, Debug, new, Getters, Deserialize, Serialize, PartialEq)] pub struct AccentPhraseModel { /// モーラの配列。 moras: Vec, diff --git a/crates/voicevox_core/src/engine/mora_list.rs b/crates/voicevox_core/src/engine/mora_list.rs index 9c2c8d188..6e4b8ba2b 100644 --- a/crates/voicevox_core/src/engine/mora_list.rs +++ b/crates/voicevox_core/src/engine/mora_list.rs @@ -186,7 +186,7 @@ pub(super) const MORA_LIST_MINIMUM: &[[&str; 3]] = &[ ["ア", "", "a"], ]; -pub fn mora2text(mora: &str) -> &str { +pub(crate) fn mora2text(mora: &str) -> &str { for &[text, consonant, vowel] in MORA_LIST_MINIMUM { if mora.len() >= consonant.len() && &mora[..consonant.len()] == consonant diff --git a/crates/voicevox_core/src/infer/status.rs b/crates/voicevox_core/src/infer/status.rs index 07c189e47..65e9344af 100644 --- a/crates/voicevox_core/src/infer/status.rs +++ b/crates/voicevox_core/src/infer/status.rs @@ -31,14 +31,14 @@ pub(crate) struct Status { } impl Status { - pub fn new(session_options: EnumMap) -> Self { + pub(crate) fn new(session_options: EnumMap) -> Self { Self { loaded_models: Default::default(), session_options, } } - pub fn insert_model( + pub(crate) fn insert_model( &self, model_header: &VoiceModelHeader, model_bytes: &EnumMap>, @@ -64,11 +64,11 @@ impl Status { Ok(()) } - pub fn unload_model(&self, voice_model_id: &VoiceModelId) -> Result<()> { + pub(crate) fn unload_model(&self, voice_model_id: &VoiceModelId) -> Result<()> { self.loaded_models.lock().unwrap().remove(voice_model_id) } - pub fn metas(&self) -> VoiceModelMeta { + pub(crate) fn metas(&self) -> VoiceModelMeta { self.loaded_models.lock().unwrap().metas() } @@ -76,18 +76,18 @@ impl Status { self.loaded_models.lock().unwrap().ids_for(style_id) } - pub fn is_loaded_model(&self, voice_model_id: &VoiceModelId) -> bool { + pub(crate) fn is_loaded_model(&self, voice_model_id: &VoiceModelId) -> bool { self.loaded_models .lock() .unwrap() .contains_voice_model(voice_model_id) } - pub fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool { + pub(crate) fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool { self.loaded_models.lock().unwrap().contains_style(style_id) } - pub fn validate_speaker_id(&self, style_id: StyleId) -> bool { + pub(crate) fn validate_speaker_id(&self, style_id: StyleId) -> bool { self.is_loaded_model_by_style_id(style_id) } diff --git a/crates/voicevox_core/src/lib.rs b/crates/voicevox_core/src/lib.rs index ea74c9f7c..7cb4abaf1 100644 --- a/crates/voicevox_core/src/lib.rs +++ b/crates/voicevox_core/src/lib.rs @@ -12,6 +12,7 @@ mod numerics; mod result; mod synthesizer; mod task; +mod text_analyzer; mod user_dict; mod version; mod voice_model; @@ -23,6 +24,11 @@ pub mod tokio; #[cfg(test)] mod test_util; +// https://crates.io/crates/rstest_reuse#use-rstest_resuse-at-the-top-of-your-crate +#[allow(clippy::single_component_path_imports)] +#[cfg(test)] +use rstest_reuse; + pub use self::{ devices::SupportedDevices, engine::{AccentPhraseModel, AudioQueryModel, FullcontextExtractor}, diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs index 202e917c7..a3c34489e 100644 --- a/crates/voicevox_core/src/synthesizer.rs +++ b/crates/voicevox_core/src/synthesizer.rs @@ -80,7 +80,7 @@ pub(crate) mod blocking { use enum_map::enum_map; use crate::{ - engine::{self, create_kana, parse_kana, MoraModel, OjtPhoneme, Utterance}, + engine::{create_kana, mora_to_text, MoraModel, OjtPhoneme}, error::ErrorRepr, infer::{ domain::{ @@ -92,6 +92,7 @@ pub(crate) mod blocking { InferenceSessionOptions, }, numerics::F32Ext as _, + text_analyzer::{KanaAnalyzer, OpenJTalkAnalyzer, TextAnalyzer}, AccentPhraseModel, AudioQueryModel, FullcontextExtractor, Result, StyleId, SupportedDevices, SynthesisOptions, VoiceModelId, VoiceModelMeta, }; @@ -103,7 +104,8 @@ pub(crate) mod blocking { /// 音声シンセサイザ。 pub struct Synthesizer { pub(super) status: Status, - open_jtalk: O, + open_jtalk_analyzer: OpenJTalkAnalyzer, + kana_analyzer: KanaAnalyzer, use_gpu: bool, } @@ -176,7 +178,8 @@ pub(crate) mod blocking { return Ok(Self { status, - open_jtalk, + open_jtalk_analyzer: OpenJTalkAnalyzer::new(open_jtalk), + kana_analyzer: KanaAnalyzer, use_gpu, }); @@ -373,7 +376,7 @@ pub(crate) mod blocking { let pitch = (*last_mora.pitch() + ADJUST_PITCH).min(MAX_PITCH); MoraModel::new( - mora_to_text(last_mora.vowel()), + mora_to_text(None, last_mora.vowel()), None, None, last_mora.vowel().clone(), @@ -457,7 +460,8 @@ pub(crate) mod blocking { kana: &str, style_id: StyleId, ) -> Result> { - self.replace_mora_data(&parse_kana(kana)?, style_id) + let accent_phrases = self.kana_analyzer.analyze(kana)?; + self.replace_mora_data(&accent_phrases, style_id) } /// AccentPhraseの配列の音高・音素長を、特定の声で生成しなおす。 @@ -743,75 +747,7 @@ pub(crate) mod blocking { text: &str, style_id: StyleId, ) -> Result> { - if text.is_empty() { - return Ok(Vec::new()); - } - - let utterance = Utterance::extract_full_context_label(&self.open_jtalk, text)?; - - let accent_phrases: Vec = utterance - .breath_groups() - .iter() - .enumerate() - .fold(Vec::new(), |mut accum_vec, (i, breath_group)| { - accum_vec.extend(breath_group.accent_phrases().iter().enumerate().map( - |(j, accent_phrase)| { - let moras = accent_phrase - .moras() - .iter() - .map(|mora| { - let mora_text = mora - .phonemes() - .iter() - .map(|phoneme| phoneme.phoneme().to_string()) - .collect::>() - .join(""); - - let (consonant, consonant_length) = - if let Some(consonant) = mora.consonant() { - (Some(consonant.phoneme().to_string()), Some(0.)) - } else { - (None, None) - }; - - MoraModel::new( - mora_to_text(mora_text), - consonant, - consonant_length, - mora.vowel().phoneme().into(), - 0., - 0., - ) - }) - .collect(); - - let pause_mora = if i != utterance.breath_groups().len() - 1 - && j == breath_group.accent_phrases().len() - 1 - { - Some(MoraModel::new( - "、".into(), - None, - None, - "pau".into(), - 0., - 0., - )) - } else { - None - }; - - AccentPhraseModel::new( - moras, - *accent_phrase.accent(), - pause_mora, - *accent_phrase.is_interrogative(), - ) - }, - )); - - accum_vec - }); - + let accent_phrases = self.open_jtalk_analyzer.analyze(text)?; self.replace_mora_data(&accent_phrases, style_id) } @@ -1175,21 +1111,6 @@ pub(crate) mod blocking { (consonant_phoneme_list, vowel_phoneme_list, vowel_indexes) } - fn mora_to_text(mora: impl AsRef) -> String { - let last_char = mora.as_ref().chars().last().unwrap(); - let mora = if ['A', 'I', 'U', 'E', 'O'].contains(&last_char) { - format!( - "{}{}", - &mora.as_ref()[0..mora.as_ref().len() - 1], - last_char.to_lowercase() - ) - } else { - mora.as_ref().to_string() - }; - // もしカタカナに変換できなければ、引数で与えた文字列がそのまま返ってくる - engine::mora2text(&mora).to_string() - } - impl AudioQueryModel { fn from_accent_phrases(accent_phrases: Vec) -> Self { let kana = create_kana(&accent_phrases); diff --git a/crates/voicevox_core/src/test_util.rs b/crates/voicevox_core/src/test_util.rs index 926fe45bb..d60785246 100644 --- a/crates/voicevox_core/src/test_util.rs +++ b/crates/voicevox_core/src/test_util.rs @@ -2,7 +2,7 @@ use std::path::PathBuf; use crate::Result; -pub async fn open_default_vvm_file() -> crate::tokio::VoiceModel { +pub(crate) async fn open_default_vvm_file() -> crate::tokio::VoiceModel { crate::tokio::VoiceModel::from_path( ::test_util::convert_zip_vvm( PathBuf::from(env!("CARGO_WORKSPACE_DIR")) diff --git a/crates/voicevox_core/src/text_analyzer.rs b/crates/voicevox_core/src/text_analyzer.rs new file mode 100644 index 000000000..8540f26e0 --- /dev/null +++ b/crates/voicevox_core/src/text_analyzer.rs @@ -0,0 +1,40 @@ +use crate::{ + engine::{extract_full_context_label, parse_kana}, + AccentPhraseModel, FullcontextExtractor, Result, +}; + +pub(crate) trait TextAnalyzer { + fn analyze(&self, text: &str) -> Result>; +} + +/// AquesTalk風記法からAccentPhraseの配列を生成するTextAnalyzer +#[derive(Clone)] +pub(crate) struct KanaAnalyzer; + +impl TextAnalyzer for KanaAnalyzer { + fn analyze(&self, text: &str) -> Result> { + if text.is_empty() { + return Ok(Vec::new()); + } + Ok(parse_kana(text)?) + } +} + +/// OpenJtalkからAccentPhraseの配列を生成するTextAnalyzer +#[derive(Clone)] +pub(crate) struct OpenJTalkAnalyzer(O); + +impl OpenJTalkAnalyzer { + pub(crate) fn new(open_jtalk: O) -> Self { + Self(open_jtalk) + } +} + +impl TextAnalyzer for OpenJTalkAnalyzer { + fn analyze(&self, text: &str) -> Result> { + if text.is_empty() { + return Ok(Vec::new()); + } + Ok(extract_full_context_label(&self.0, text)?) + } +} diff --git a/crates/voicevox_core/src/user_dict/part_of_speech_data.rs b/crates/voicevox_core/src/user_dict/part_of_speech_data.rs index 76e36e389..b7bc95440 100644 --- a/crates/voicevox_core/src/user_dict/part_of_speech_data.rs +++ b/crates/voicevox_core/src/user_dict/part_of_speech_data.rs @@ -1,37 +1,36 @@ -use derive_getters::Getters; use once_cell::sync::Lazy; use std::collections::HashMap; use crate::UserDictWordType; /// 最小の優先度。 -pub static MIN_PRIORITY: u32 = 0; +pub(super) static MIN_PRIORITY: u32 = 0; /// 最大の優先度。 -pub static MAX_PRIORITY: u32 = 10; +pub(super) static MAX_PRIORITY: u32 = 10; /// 品詞ごとの情報。 -#[derive(Debug, Getters)] -pub struct PartOfSpeechDetail { +#[derive(Debug)] +pub(super) struct PartOfSpeechDetail { /// 品詞。 - pub part_of_speech: &'static str, + pub(super) part_of_speech: &'static str, /// 品詞細分類1。 - pub part_of_speech_detail_1: &'static str, + pub(super) part_of_speech_detail_1: &'static str, /// 品詞細分類2。 - pub part_of_speech_detail_2: &'static str, + pub(super) part_of_speech_detail_2: &'static str, /// 品詞細分類3。 - pub part_of_speech_detail_3: &'static str, + pub(super) part_of_speech_detail_3: &'static str, /// 文脈IDは辞書の左・右文脈IDのこと。 /// /// 参考: - pub context_id: i32, + pub(super) context_id: i32, /// コストのパーセンタイル。 - pub cost_candidates: Vec, + cost_candidates: Vec, /// アクセント結合規則の一覧。 - pub accent_associative_rules: Vec<&'static str>, + _accent_associative_rules: Vec<&'static str>, // unused for now } // 元データ: https://github.com/VOICEVOX/voicevox_engine/blob/master/voicevox_engine/part_of_speech_data.py -pub static PART_OF_SPEECH_DETAIL: Lazy> = +pub(super) static PART_OF_SPEECH_DETAIL: Lazy> = Lazy::new(|| { HashMap::from_iter([ ( @@ -45,7 +44,7 @@ pub static PART_OF_SPEECH_DETAIL: Lazy &'static [i32] { .cost_candidates } -pub fn priority2cost(context_id: i32, priority: u32) -> i32 { +pub(super) fn priority2cost(context_id: i32, priority: u32) -> i32 { let cost_candidates = search_cost_candidates(context_id); cost_candidates[(MAX_PRIORITY - priority) as usize] } diff --git a/crates/voicevox_core/src/user_dict/word.rs b/crates/voicevox_core/src/user_dict/word.rs index f2abc905d..7ed98a949 100644 --- a/crates/voicevox_core/src/user_dict/word.rs +++ b/crates/voicevox_core/src/user_dict/word.rs @@ -219,7 +219,7 @@ pub enum UserDictWordType { } impl UserDictWord { - pub fn to_mecab_format(&self) -> String { + pub(super) fn to_mecab_format(&self) -> String { let pos = PART_OF_SPEECH_DETAIL.get(&self.word_type).unwrap(); format!( "{},{},{},{},{},{},{},{},{},{},{},{},{},{}/{},{}", diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs index fc8f4d20f..76f692eaa 100644 --- a/crates/voicevox_core/src/voice_model.rs +++ b/crates/voicevox_core/src/voice_model.rs @@ -1,3 +1,7 @@ +//! 音声モデル( VVM ファイル)。 +//! +//! VVM ファイルの定義と形式は[ドキュメント](../../../docs/vvm.md)を参照。 + use derive_getters::Getters; use derive_new::new; use serde::Deserialize; @@ -287,42 +291,43 @@ pub(crate) mod tokio { } #[derive(new)] - struct AsyncVvmEntryReader { - reader: async_zip::read::fs::ZipFileReader, + struct AsyncVvmEntryReader<'a> { + path: &'a Path, + reader: async_zip::base::read::mem::ZipFileReader, entry_map: HashMap, } - impl AsyncVvmEntryReader { - async fn open(path: &Path) -> LoadModelResult { - let reader = async_zip::read::fs::ZipFileReader::new(path) - .await - .map_err(|source| LoadModelError { - path: path.to_owned(), - context: LoadModelErrorKind::OpenZipFile, - source: Some(source.into()), - })?; + impl<'a> AsyncVvmEntryReader<'a> { + async fn open(path: &'a Path) -> LoadModelResult { + let reader = async { + let file = fs_err::tokio::read(path).await?; + async_zip::base::read::mem::ZipFileReader::new(file).await + } + .await + .map_err(|source| LoadModelError { + path: path.to_owned(), + context: LoadModelErrorKind::OpenZipFile, + source: Some(source.into()), + })?; let entry_map: HashMap<_, _> = reader .file() .entries() .iter() - .filter(|e| !e.entry().dir()) - .enumerate() - .map(|(i, e)| { - ( - e.entry().filename().to_string(), - AsyncVvmEntry { - index: i, - entry: e.entry().clone(), - }, - ) + .flat_map(|e| { + // 非UTF-8のファイルを利用することはないため、無視する + let filename = e.filename().as_str().ok()?; + (!e.dir().ok()?).then_some(())?; + Some((filename.to_owned(), (**e).clone())) }) + .enumerate() + .map(|(i, (filename, entry))| (filename, AsyncVvmEntry { index: i, entry })) .collect(); - Ok(AsyncVvmEntryReader::new(reader, entry_map)) + Ok(AsyncVvmEntryReader::new(path, reader, entry_map)) } async fn read_vvm_json(&self, filename: &str) -> LoadModelResult { let bytes = self.read_vvm_entry(filename).await?; serde_json::from_slice(&bytes).map_err(|source| LoadModelError { - path: self.reader.path().to_owned(), + path: self.path.to_owned(), context: LoadModelErrorKind::ReadZipEntry { filename: filename.to_owned(), }, @@ -336,16 +341,14 @@ pub(crate) mod tokio { .entry_map .get(filename) .ok_or_else(|| io::Error::from(io::ErrorKind::NotFound))?; - let mut manifest_reader = self.reader.entry(me.index).await?; + let mut manifest_reader = self.reader.reader_with_entry(me.index).await?; let mut buf = Vec::with_capacity(me.entry.uncompressed_size() as usize); - manifest_reader - .read_to_end_checked(&mut buf, &me.entry) - .await?; + manifest_reader.read_to_end_checked(&mut buf).await?; Ok::<_, anyhow::Error>(buf) } .await .map_err(|source| LoadModelError { - path: self.reader.path().to_owned(), + path: self.path.to_owned(), context: LoadModelErrorKind::ReadZipEntry { filename: filename.to_owned(), }, diff --git a/crates/voicevox_core_c_api/src/helpers.rs b/crates/voicevox_core_c_api/src/helpers.rs index d69641c34..74177db9c 100644 --- a/crates/voicevox_core_c_api/src/helpers.rs +++ b/crates/voicevox_core_c_api/src/helpers.rs @@ -63,7 +63,7 @@ pub(crate) fn into_result_code_with_error(result: CApiResult<()>) -> VoicevoxRes pub(crate) type CApiResult = std::result::Result; #[derive(Error, Debug)] -pub enum CApiError { +pub(crate) enum CApiError { #[error("{0}")] RustApi(#[from] voicevox_core::Error), #[error("UTF-8として不正な入力です")] diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 61127ff81..fbb0bf6bf 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -447,8 +447,10 @@ pub unsafe extern "C" fn voicevox_synthesizer_is_loaded_voice_model( model_id: VoicevoxVoiceModelId, ) -> bool { init_logger_once(); - // FIXME: 不正なUTF-8文字列に対し、正式なエラーとするか黙って`false`を返す - let raw_model_id = ensure_utf8(unsafe { CStr::from_ptr(model_id) }).unwrap(); + let Ok(raw_model_id) = ensure_utf8(unsafe { CStr::from_ptr(model_id) }) else { + // 与えられたIDがUTF-8ではない場合、それに対応する`VoicdModel`は確実に存在しない + return false; + }; synthesizer .synthesizer() .is_loaded_voice_model(&VoiceModelId::new(raw_model_id.into())) diff --git a/crates/voicevox_core_c_api/xcframework/Frameworks/aarch64/voicevox_core.framework/Info.plist b/crates/voicevox_core_c_api/xcframework/Frameworks/aarch64/voicevox_core.framework/Info.plist new file mode 100644 index 000000000..f3f359d09 --- /dev/null +++ b/crates/voicevox_core_c_api/xcframework/Frameworks/aarch64/voicevox_core.framework/Info.plist @@ -0,0 +1,55 @@ + + + + + BuildMachineOSBuild + 23B81 + CFBundleDevelopmentRegion + en + CFBundleExecutable + voicevox_core + CFBundleIdentifier + jp.hiroshiba.voicevox.voicevox-core + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + voicevox_core + CFBundlePackageType + FMWK + CFBundleShortVersionString + 1.0 + CFBundleSupportedPlatforms + + iPhoneOS + + CFBundleVersion + 1 + DTCompiler + com.apple.compilers.llvm.clang.1_0 + DTPlatformBuild + 21C52 + DTPlatformName + iphoneos + DTPlatformVersion + 17.2 + DTSDKBuild + 21C52 + DTSDKName + iphoneos17.2 + DTXcode + 1510 + DTXcodeBuild + 15C65 + MinimumOSVersion + 16.2 + UIDeviceFamily + + 1 + 2 + + UIRequiredDeviceCapabilities + + arm64 + + + diff --git a/crates/voicevox_core_c_api/xcframework/Frameworks/aarch64/voicevox_core.framework/Modules/module.modulemap b/crates/voicevox_core_c_api/xcframework/Frameworks/aarch64/voicevox_core.framework/Modules/module.modulemap new file mode 100644 index 000000000..a4812ac0c --- /dev/null +++ b/crates/voicevox_core_c_api/xcframework/Frameworks/aarch64/voicevox_core.framework/Modules/module.modulemap @@ -0,0 +1,6 @@ +framework module voicevox_core { + umbrella header "voicevox_core.h" + export * + + module * { export * } +} diff --git a/crates/voicevox_core_c_api/xcframework/Frameworks/sim/voicevox_core.framework/Info.plist b/crates/voicevox_core_c_api/xcframework/Frameworks/sim/voicevox_core.framework/Info.plist new file mode 100644 index 000000000..dea41a54d --- /dev/null +++ b/crates/voicevox_core_c_api/xcframework/Frameworks/sim/voicevox_core.framework/Info.plist @@ -0,0 +1,51 @@ + + + + + BuildMachineOSBuild + 23B81 + CFBundleDevelopmentRegion + en + CFBundleExecutable + voicevox_core + CFBundleIdentifier + jp.hiroshiba.voicevox.voicevox-core + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + voicevox_core + CFBundlePackageType + FMWK + CFBundleShortVersionString + 1.0 + CFBundleSupportedPlatforms + + iPhoneSimulator + + CFBundleVersion + 1 + DTCompiler + com.apple.compilers.llvm.clang.1_0 + DTPlatformBuild + 21C52 + DTPlatformName + iphonesimulator + DTPlatformVersion + 17.2 + DTSDKBuild + 21C52 + DTSDKName + iphonesimulator17.2 + DTXcode + 1510 + DTXcodeBuild + 15C65 + MinimumOSVersion + 16.2 + UIDeviceFamily + + 1 + 2 + + + diff --git a/crates/voicevox_core_c_api/xcframework/Frameworks/sim/voicevox_core.framework/Modules/module.modulemap b/crates/voicevox_core_c_api/xcframework/Frameworks/sim/voicevox_core.framework/Modules/module.modulemap new file mode 100644 index 000000000..a4812ac0c --- /dev/null +++ b/crates/voicevox_core_c_api/xcframework/Frameworks/sim/voicevox_core.framework/Modules/module.modulemap @@ -0,0 +1,6 @@ +framework module voicevox_core { + umbrella header "voicevox_core.h" + export * + + module * { export * } +} diff --git a/crates/voicevox_core_c_api/xcframework/Headers/README.md b/crates/voicevox_core_c_api/xcframework/Headers/README.md deleted file mode 100644 index cd3b3f6a0..000000000 --- a/crates/voicevox_core_c_api/xcframework/Headers/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# xcframeworkのHeadersに追加されるファイル -## module.modulemap - -C言語やObjective-Cのモジュールが、ライブラリ内の公開インターフェースを表現するために使用されます。 -これにより、外部のコードがこのライブラリを利用する際に、必要なインクルードや参照を容易にすることができます。 diff --git a/crates/voicevox_core_c_api/xcframework/Headers/module.modulemap b/crates/voicevox_core_c_api/xcframework/Headers/module.modulemap deleted file mode 100644 index 1b1680b2d..000000000 --- a/crates/voicevox_core_c_api/xcframework/Headers/module.modulemap +++ /dev/null @@ -1,4 +0,0 @@ -module VoicevoxCore { - header "voicevox_core.h" - export * -} diff --git a/crates/voicevox_core_c_api/xcframework/README.md b/crates/voicevox_core_c_api/xcframework/README.md new file mode 100644 index 000000000..3dda8f3a7 --- /dev/null +++ b/crates/voicevox_core_c_api/xcframework/README.md @@ -0,0 +1,6 @@ +# xcframeworkフォルダの内容について +## Frameworks + +iOS向けの配布ライブラリXCFramework内のFrameworkを作るための雛形です。 +雛形は端末用とシミュレータ用の2種類です。 + diff --git a/crates/voicevox_core_java_api/src/common.rs b/crates/voicevox_core_java_api/src/common.rs index 5d1278f0c..71a60f5f3 100644 --- a/crates/voicevox_core_java_api/src/common.rs +++ b/crates/voicevox_core_java_api/src/common.rs @@ -1,72 +1,7 @@ use std::{error::Error as _, iter}; use derive_more::From; -use jni::{ - objects::{JObject, JThrowable}, - JNIEnv, -}; - -// FIXME: 別ファイルに分離する -#[no_mangle] -extern "system" fn Java_jp_hiroshiba_voicevoxcore_Dll_00024LoggerInitializer_initLogger( - _: JNIEnv<'_>, - _: JObject<'_>, -) { - if cfg!(target_os = "android") { - android_logger::init_once( - android_logger::Config::default() - .with_tag("VoicevoxCore") - .with_filter( - android_logger::FilterBuilder::new() - // FIXME: ortも`warn`は出すべき - .parse("error,voicevox_core=info,voicevox_core_java_api=info,ort=error") - .build(), - ), - ); - } else { - // TODO: Android以外でのログ出力を良い感じにする。(System.Loggerを使う?) - use chrono::SecondsFormat; - use std::{ - env, fmt, - io::{self, IsTerminal, Write}, - }; - use tracing_subscriber::{fmt::format::Writer, EnvFilter}; - - // FIXME: `try_init` → `init` (subscriberは他に存在しないはずなので) - let _ = tracing_subscriber::fmt() - .with_env_filter(if env::var_os(EnvFilter::DEFAULT_ENV).is_some() { - EnvFilter::from_default_env() - } else { - // FIXME: `c_api`じゃないし、ortも`warn`は出すべき - "error,voicevox_core=info,voicevox_core_c_api=info,ort=error".into() - }) - .with_timer(local_time as fn(&mut Writer<'_>) -> _) - .with_ansi(out().is_terminal() && env_allows_ansi()) - .with_writer(out) - .try_init(); - - fn local_time(wtr: &mut Writer<'_>) -> fmt::Result { - // ローカル時刻で表示はするが、そのフォーマットはtracing-subscriber本来のものに近いようにする。 - // https://github.com/tokio-rs/tracing/blob/tracing-subscriber-0.3.16/tracing-subscriber/src/fmt/time/datetime.rs#L235-L241 - wtr.write_str(&chrono::Local::now().to_rfc3339_opts(SecondsFormat::Micros, false)) - } - - fn out() -> impl IsTerminal + Write { - io::stderr() - } - - fn env_allows_ansi() -> bool { - // https://docs.rs/termcolor/1.2.0/src/termcolor/lib.rs.html#245-291 - // ただしWindowsではPowerShellっぽかったらそのまま許可する。 - // ちゃんとやるなら`ENABLE_VIRTUAL_TERMINAL_PROCESSING`をチェックするなり、そもそも - // fwdansiとかでWin32の色に変換するべきだが、面倒。 - env::var_os("TERM").map_or( - cfg!(windows) && env::var_os("PSModulePath").is_some(), - |term| term != "dumb", - ) && env::var_os("NO_COLOR").is_none() - } - } -} +use jni::{objects::JThrowable, JNIEnv}; #[macro_export] macro_rules! object { @@ -94,7 +29,7 @@ macro_rules! enum_object { }; } -pub fn throw_if_err(mut env: JNIEnv<'_>, fallback: T, inner: F) -> T +pub(crate) fn throw_if_err(mut env: JNIEnv<'_>, fallback: T, inner: F) -> T where F: FnOnce(&mut JNIEnv<'_>) -> Result, { @@ -220,7 +155,7 @@ where } #[derive(From, Debug)] -pub enum JavaApiError { +pub(crate) enum JavaApiError { #[from] RustApi(voicevox_core::Error), diff --git a/crates/voicevox_core_java_api/src/lib.rs b/crates/voicevox_core_java_api/src/lib.rs index 4fdea9cab..9615f0a94 100644 --- a/crates/voicevox_core_java_api/src/lib.rs +++ b/crates/voicevox_core_java_api/src/lib.rs @@ -1,5 +1,6 @@ mod common; mod info; +mod logger; mod open_jtalk; mod synthesizer; mod user_dict; diff --git a/crates/voicevox_core_java_api/src/logger.rs b/crates/voicevox_core_java_api/src/logger.rs new file mode 100644 index 000000000..30545725e --- /dev/null +++ b/crates/voicevox_core_java_api/src/logger.rs @@ -0,0 +1,62 @@ +use jni::{objects::JObject, JNIEnv}; + +#[no_mangle] +extern "system" fn Java_jp_hiroshiba_voicevoxcore_Dll_00024LoggerInitializer_initLogger( + _: JNIEnv<'_>, + _: JObject<'_>, +) { + if cfg!(target_os = "android") { + android_logger::init_once( + android_logger::Config::default() + .with_tag("VoicevoxCore") + .with_filter( + android_logger::FilterBuilder::new() + // FIXME: ortも`warn`は出すべき + .parse("error,voicevox_core=info,voicevox_core_java_api=info,ort=error") + .build(), + ), + ); + } else { + // TODO: Android以外でのログ出力を良い感じにする。(System.Loggerを使う?) + use chrono::SecondsFormat; + use std::{ + env, fmt, + io::{self, IsTerminal, Write}, + }; + use tracing_subscriber::{fmt::format::Writer, EnvFilter}; + + // FIXME: `try_init` → `init` (subscriberは他に存在しないはずなので) + let _ = tracing_subscriber::fmt() + .with_env_filter(if env::var_os(EnvFilter::DEFAULT_ENV).is_some() { + EnvFilter::from_default_env() + } else { + // FIXME: `c_api`じゃないし、ortも`warn`は出すべき + "error,voicevox_core=info,voicevox_core_c_api=info,ort=error".into() + }) + .with_timer(local_time as fn(&mut Writer<'_>) -> _) + .with_ansi(out().is_terminal() && env_allows_ansi()) + .with_writer(out) + .try_init(); + + fn local_time(wtr: &mut Writer<'_>) -> fmt::Result { + // ローカル時刻で表示はするが、そのフォーマットはtracing-subscriber本来のものに近いようにする。 + // https://github.com/tokio-rs/tracing/blob/tracing-subscriber-0.3.16/tracing-subscriber/src/fmt/time/datetime.rs#L235-L241 + wtr.write_str(&chrono::Local::now().to_rfc3339_opts(SecondsFormat::Micros, false)) + } + + fn out() -> impl IsTerminal + Write { + io::stderr() + } + + fn env_allows_ansi() -> bool { + // https://docs.rs/termcolor/1.2.0/src/termcolor/lib.rs.html#245-291 + // ただしWindowsではPowerShellっぽかったらそのまま許可する。 + // ちゃんとやるなら`ENABLE_VIRTUAL_TERMINAL_PROCESSING`をチェックするなり、そもそも + // fwdansiとかでWin32の色に変換するべきだが、面倒。 + env::var_os("TERM").map_or( + cfg!(windows) && env::var_os("PSModulePath").is_some(), + |term| term != "dumb", + ) && env::var_os("NO_COLOR").is_none() + } + } +} diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi b/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi index 7a6596008..d8e9f6fe2 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi +++ b/crates/voicevox_core_python_api/python/voicevox_core/_rust/asyncio.pyi @@ -1,4 +1,4 @@ -from pathlib import Path +from os import PathLike from typing import TYPE_CHECKING, Dict, List, Literal, Union from uuid import UUID @@ -18,7 +18,7 @@ class VoiceModel: 音声モデル。""" @staticmethod - async def from_path(path: Union[Path, str]) -> VoiceModel: + async def from_path(path: Union[str, PathLike[str]]) -> VoiceModel: """ VVMファイルから ``VoiceModel`` を生成する。 @@ -43,7 +43,7 @@ class OpenJtalk: """ @staticmethod - async def new(open_jtalk_dict_dir: Union[Path, str]) -> "OpenJtalk": + async def new(open_jtalk_dict_dir: Union[str, PathLike[str]]) -> "OpenJtalk": """ ``OpenJTalk`` を生成する。 diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi b/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi index 3a208fb33..5584d68bb 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi +++ b/crates/voicevox_core_python_api/python/voicevox_core/_rust/blocking.pyi @@ -1,4 +1,4 @@ -from pathlib import Path +from os import PathLike from typing import TYPE_CHECKING, Dict, List, Literal, Union from uuid import UUID @@ -18,7 +18,7 @@ class VoiceModel: 音声モデル。""" @staticmethod - def from_path(path: Union[Path, str]) -> VoiceModel: + def from_path(path: Union[str, PathLike[str]]) -> VoiceModel: """ VVMファイルから ``VoiceModel`` を生成する。 @@ -47,7 +47,7 @@ class OpenJtalk: Open JTalkの辞書ディレクトリ。 """ - def __init__(self, open_jtalk_dict_dir: Union[Path, str]) -> None: ... + def __init__(self, open_jtalk_dict_dir: Union[str, PathLike[str]]) -> None: ... def use_user_dict(self, user_dict: UserDict) -> None: """ ユーザー辞書を設定する。 diff --git a/crates/voicevox_core_python_api/src/convert.rs b/crates/voicevox_core_python_api/src/convert.rs index 6544ce26c..3f629f90e 100644 --- a/crates/voicevox_core_python_api/src/convert.rs +++ b/crates/voicevox_core_python_api/src/convert.rs @@ -22,7 +22,7 @@ use crate::{ UseUserDictError, WordNotFoundError, }; -pub fn from_acceleration_mode(ob: &PyAny) -> PyResult { +pub(crate) fn from_acceleration_mode(ob: &PyAny) -> PyResult { let py = ob.py(); let class = py.import("voicevox_core")?.getattr("AccelerationMode")?; @@ -39,8 +39,8 @@ pub fn from_acceleration_mode(ob: &PyAny) -> PyResult { } } -// FIXME: `VoiceModel`や`UserDict`についてはこれではなく、`PathBuf::extract`を直接使うようにする -pub fn from_utf8_path(ob: &PyAny) -> PyResult { +// FIXME: `UserDict`についてはこれではなく、`PathBuf::extract`を直接使うようにする +pub(crate) fn from_utf8_path(ob: &PyAny) -> PyResult { PathBuf::extract(ob)? .into_os_string() .into_string() @@ -48,7 +48,7 @@ pub fn from_utf8_path(ob: &PyAny) -> PyResult { .map_err(|s| PyValueError::new_err(format!("{s:?} cannot be encoded to UTF-8"))) } -pub fn from_dataclass(ob: &PyAny) -> PyResult { +pub(crate) fn from_dataclass(ob: &PyAny) -> PyResult { let py = ob.py(); let ob = py.import("dataclasses")?.call_method1("asdict", (ob,))?; @@ -59,7 +59,7 @@ pub fn from_dataclass(ob: &PyAny) -> PyResult { serde_json::from_str(json).into_py_value_result() } -pub fn to_pydantic_voice_model_meta<'py>( +pub(crate) fn to_pydantic_voice_model_meta<'py>( metas: &VoiceModelMeta, py: Python<'py>, ) -> PyResult> { @@ -74,7 +74,7 @@ pub fn to_pydantic_voice_model_meta<'py>( .collect::>>() } -pub fn to_pydantic_dataclass(x: impl Serialize, class: &PyAny) -> PyResult<&PyAny> { +pub(crate) fn to_pydantic_dataclass(x: impl Serialize, class: &PyAny) -> PyResult<&PyAny> { let py = class.py(); let x = serde_json::to_string(&x).into_py_value_result()?; @@ -108,7 +108,7 @@ pub(crate) fn blocking_modify_accent_phrases<'py>( .collect() } -pub fn async_modify_accent_phrases<'py, Fun, Fut>( +pub(crate) fn async_modify_accent_phrases<'py, Fun, Fut>( accent_phrases: &'py PyList, speaker_id: StyleId, py: Python<'py>, @@ -145,16 +145,16 @@ where ) } -pub fn to_rust_uuid(ob: &PyAny) -> PyResult { +pub(crate) fn to_rust_uuid(ob: &PyAny) -> PyResult { let uuid = ob.getattr("hex")?.extract::()?; uuid.parse::().into_py_value_result() } -pub fn to_py_uuid(py: Python<'_>, uuid: Uuid) -> PyResult { +pub(crate) fn to_py_uuid(py: Python<'_>, uuid: Uuid) -> PyResult { let uuid = uuid.hyphenated().to_string(); let uuid = py.import("uuid")?.call_method1("UUID", (uuid,))?; Ok(uuid.to_object(py)) } -pub fn to_rust_user_dict_word(ob: &PyAny) -> PyResult { +pub(crate) fn to_rust_user_dict_word(ob: &PyAny) -> PyResult { voicevox_core::UserDictWord::new( ob.getattr("surface")?.extract()?, ob.getattr("pronunciation")?.extract()?, @@ -164,7 +164,7 @@ pub fn to_rust_user_dict_word(ob: &PyAny) -> PyResult( +pub(crate) fn to_py_user_dict_word<'py>( py: Python<'py>, word: &voicevox_core::UserDictWord, ) -> PyResult<&'py PyAny> { @@ -174,14 +174,14 @@ pub fn to_py_user_dict_word<'py>( .downcast()?; to_pydantic_dataclass(word, class) } -pub fn to_rust_word_type(word_type: &PyAny) -> PyResult { +pub(crate) fn to_rust_word_type(word_type: &PyAny) -> PyResult { let name = word_type.getattr("name")?.extract::()?; serde_json::from_value::(json!(name)).into_py_value_result() } #[ext(VoicevoxCoreResultExt)] -pub impl voicevox_core::Result { +pub(crate) impl voicevox_core::Result { fn into_py_result(self, py: Python<'_>) -> PyResult { use voicevox_core::ErrorKind; diff --git a/crates/voicevox_core_python_api/src/lib.rs b/crates/voicevox_core_python_api/src/lib.rs index 4cde8d711..4d190333d 100644 --- a/crates/voicevox_core_python_api/src/lib.rs +++ b/crates/voicevox_core_python_api/src/lib.rs @@ -1,25 +1,15 @@ -use std::{marker::PhantomData, sync::Arc}; +use std::marker::PhantomData; mod convert; -use self::convert::{ - async_modify_accent_phrases, from_acceleration_mode, from_dataclass, from_utf8_path, - to_py_user_dict_word, to_py_uuid, to_pydantic_dataclass, to_pydantic_voice_model_meta, - to_rust_user_dict_word, to_rust_uuid, VoicevoxCoreResultExt as _, -}; -use camino::Utf8PathBuf; +use self::convert::{from_utf8_path, to_pydantic_dataclass, VoicevoxCoreResultExt as _}; use easy_ext::ext; use log::debug; use pyo3::{ create_exception, exceptions::{PyException, PyKeyError, PyValueError}, - pyclass, pyfunction, pymethods, pymodule, - types::{IntoPyDict as _, PyBytes, PyDict, PyList, PyModule}, - wrap_pyfunction, PyAny, PyObject, PyRef, PyResult, PyTypeInfo, Python, ToPyObject, -}; -use uuid::Uuid; -use voicevox_core::{ - AccelerationMode, AudioQueryModel, InitializeOptions, StyleId, SynthesisOptions, TtsOptions, - UserDictWord, VoiceModelId, + pyfunction, pymodule, + types::PyModule, + wrap_pyfunction, PyAny, PyResult, PyTypeInfo, Python, }; #[pymodule] @@ -42,10 +32,10 @@ fn rust(py: Python<'_>, module: &PyModule) -> PyResult<()> { module.add_and_register_submodule(blocking_module)?; let asyncio_module = PyModule::new(py, "voicevox_core._rust.asyncio")?; - asyncio_module.add_class::()?; - asyncio_module.add_class::()?; - asyncio_module.add_class::()?; - asyncio_module.add_class::()?; + asyncio_module.add_class::()?; + asyncio_module.add_class::()?; + asyncio_module.add_class::()?; + asyncio_module.add_class::()?; module.add_and_register_submodule(asyncio_module) } @@ -95,12 +85,6 @@ exceptions! { InvalidWordError: PyValueError; } -#[pyclass] -#[derive(Clone)] -struct VoiceModel { - model: voicevox_core::tokio::VoiceModel, -} - #[pyfunction] fn supported_devices(py: Python<'_>) -> PyResult<&PyAny> { let class = py @@ -111,395 +95,6 @@ fn supported_devices(py: Python<'_>) -> PyResult<&PyAny> { to_pydantic_dataclass(s, class) } -#[pymethods] -impl VoiceModel { - #[staticmethod] - fn from_path( - py: Python<'_>, - #[pyo3(from_py_with = "from_utf8_path")] path: Utf8PathBuf, - ) -> PyResult<&PyAny> { - pyo3_asyncio::tokio::future_into_py(py, async move { - let model = voicevox_core::tokio::VoiceModel::from_path(path).await; - let model = Python::with_gil(|py| model.into_py_result(py))?; - Ok(Self { model }) - }) - } - - #[getter] - fn id(&self) -> &str { - self.model.id().raw_voice_model_id() - } - - #[getter] - fn metas<'py>(&self, py: Python<'py>) -> Vec<&'py PyAny> { - to_pydantic_voice_model_meta(self.model.metas(), py).unwrap() - } -} - -#[pyclass] -#[derive(Clone)] -struct OpenJtalk { - open_jtalk: voicevox_core::tokio::OpenJtalk, -} - -#[pymethods] -impl OpenJtalk { - #[allow(clippy::new_ret_no_self)] - #[staticmethod] - fn new( - #[pyo3(from_py_with = "from_utf8_path")] open_jtalk_dict_dir: Utf8PathBuf, - py: Python<'_>, - ) -> PyResult<&PyAny> { - pyo3_asyncio::tokio::future_into_py(py, async move { - let open_jtalk = voicevox_core::tokio::OpenJtalk::new(open_jtalk_dict_dir).await; - let open_jtalk = Python::with_gil(|py| open_jtalk.into_py_result(py))?; - Ok(Self { open_jtalk }) - }) - } - - fn use_user_dict<'py>(&self, user_dict: UserDict, py: Python<'py>) -> PyResult<&'py PyAny> { - let this = self.open_jtalk.clone(); - - pyo3_asyncio::tokio::future_into_py(py, async move { - let result = this.use_user_dict(&user_dict.dict).await; - Python::with_gil(|py| result.into_py_result(py)) - }) - } -} - -#[pyclass] -struct Synthesizer { - synthesizer: Closable, Self>, -} - -#[pymethods] -impl Synthesizer { - #[new] - #[pyo3(signature =( - open_jtalk, - acceleration_mode = InitializeOptions::default().acceleration_mode, - cpu_num_threads = InitializeOptions::default().cpu_num_threads, - ))] - fn new( - open_jtalk: OpenJtalk, - #[pyo3(from_py_with = "from_acceleration_mode")] acceleration_mode: AccelerationMode, - cpu_num_threads: u16, - ) -> PyResult { - let synthesizer = voicevox_core::tokio::Synthesizer::new( - open_jtalk.open_jtalk.clone(), - &InitializeOptions { - acceleration_mode, - cpu_num_threads, - }, - ); - let synthesizer = Python::with_gil(|py| synthesizer.into_py_result(py))?; - let synthesizer = Closable::new(synthesizer); - Ok(Self { synthesizer }) - } - - fn __repr__(&self) -> &'static str { - "Synthesizer { .. }" - } - - fn __enter__(slf: PyRef<'_, Self>) -> PyResult> { - slf.synthesizer.get()?; - Ok(slf) - } - - fn __exit__( - &mut self, - #[allow(unused_variables)] exc_type: &PyAny, - #[allow(unused_variables)] exc_value: &PyAny, - #[allow(unused_variables)] traceback: &PyAny, - ) { - self.close(); - } - - #[getter] - fn is_gpu_mode(&self) -> PyResult { - let synthesizer = self.synthesizer.get()?; - Ok(synthesizer.is_gpu_mode()) - } - - #[getter] - fn metas<'py>(&self, py: Python<'py>) -> PyResult> { - let synthesizer = self.synthesizer.get()?; - to_pydantic_voice_model_meta(&synthesizer.metas(), py) - } - - fn load_voice_model<'py>( - &mut self, - model: &'py PyAny, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let model: VoiceModel = model.extract()?; - let synthesizer = self.synthesizer.get()?.clone(); - pyo3_asyncio::tokio::future_into_py(py, async move { - let result = synthesizer.load_voice_model(&model.model).await; - Python::with_gil(|py| result.into_py_result(py)) - }) - } - - fn unload_voice_model(&mut self, voice_model_id: &str, py: Python<'_>) -> PyResult<()> { - self.synthesizer - .get()? - .unload_voice_model(&VoiceModelId::new(voice_model_id.to_string())) - .into_py_result(py) - } - - fn is_loaded_voice_model(&self, voice_model_id: &str) -> PyResult { - Ok(self - .synthesizer - .get()? - .is_loaded_voice_model(&VoiceModelId::new(voice_model_id.to_string()))) - } - - fn audio_query_from_kana<'py>( - &self, - kana: &str, - style_id: u32, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - let kana = kana.to_owned(); - pyo3_asyncio::tokio::future_into_py_with_locals( - py, - pyo3_asyncio::tokio::get_current_locals(py)?, - async move { - let audio_query = synthesizer - .audio_query_from_kana(&kana, StyleId::new(style_id)) - .await; - - Python::with_gil(|py| { - let class = py.import("voicevox_core")?.getattr("AudioQuery")?; - let ret = to_pydantic_dataclass(audio_query.into_py_result(py)?, class)?; - Ok(ret.to_object(py)) - }) - }, - ) - } - - fn audio_query<'py>(&self, text: &str, style_id: u32, py: Python<'py>) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - let text = text.to_owned(); - pyo3_asyncio::tokio::future_into_py_with_locals( - py, - pyo3_asyncio::tokio::get_current_locals(py)?, - async move { - let audio_query = synthesizer.audio_query(&text, StyleId::new(style_id)).await; - - Python::with_gil(|py| { - let audio_query = audio_query.into_py_result(py)?; - let class = py.import("voicevox_core")?.getattr("AudioQuery")?; - let ret = to_pydantic_dataclass(audio_query, class)?; - Ok(ret.to_object(py)) - }) - }, - ) - } - - fn create_accent_phrases_from_kana<'py>( - &self, - kana: &str, - style_id: u32, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - let kana = kana.to_owned(); - pyo3_asyncio::tokio::future_into_py_with_locals( - py, - pyo3_asyncio::tokio::get_current_locals(py)?, - async move { - let accent_phrases = synthesizer - .create_accent_phrases_from_kana(&kana, StyleId::new(style_id)) - .await; - Python::with_gil(|py| { - let class = py.import("voicevox_core")?.getattr("AccentPhrase")?; - let accent_phrases = accent_phrases - .into_py_result(py)? - .iter() - .map(|ap| to_pydantic_dataclass(ap, class)) - .collect::>>(); - let list = PyList::new(py, accent_phrases); - Ok(list.to_object(py)) - }) - }, - ) - } - - fn create_accent_phrases<'py>( - &self, - text: &str, - style_id: u32, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - let text = text.to_owned(); - pyo3_asyncio::tokio::future_into_py_with_locals( - py, - pyo3_asyncio::tokio::get_current_locals(py)?, - async move { - let accent_phrases = synthesizer - .create_accent_phrases(&text, StyleId::new(style_id)) - .await; - Python::with_gil(|py| { - let class = py.import("voicevox_core")?.getattr("AccentPhrase")?; - let accent_phrases = accent_phrases - .into_py_result(py)? - .iter() - .map(|ap| to_pydantic_dataclass(ap, class)) - .collect::>>(); - let list = PyList::new(py, accent_phrases); - Ok(list.to_object(py)) - }) - }, - ) - } - - fn replace_mora_data<'py>( - &self, - accent_phrases: &'py PyList, - style_id: u32, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - async_modify_accent_phrases( - accent_phrases, - StyleId::new(style_id), - py, - |a, s| async move { synthesizer.replace_mora_data(&a, s).await }, - ) - } - - fn replace_phoneme_length<'py>( - &self, - accent_phrases: &'py PyList, - style_id: u32, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - async_modify_accent_phrases( - accent_phrases, - StyleId::new(style_id), - py, - |a, s| async move { synthesizer.replace_phoneme_length(&a, s).await }, - ) - } - - fn replace_mora_pitch<'py>( - &self, - accent_phrases: &'py PyList, - style_id: u32, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - async_modify_accent_phrases( - accent_phrases, - StyleId::new(style_id), - py, - |a, s| async move { synthesizer.replace_mora_pitch(&a, s).await }, - ) - } - - #[pyo3(signature=(audio_query,style_id,enable_interrogative_upspeak = TtsOptions::default().enable_interrogative_upspeak))] - fn synthesis<'py>( - &self, - #[pyo3(from_py_with = "from_dataclass")] audio_query: AudioQueryModel, - style_id: u32, - enable_interrogative_upspeak: bool, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - pyo3_asyncio::tokio::future_into_py_with_locals( - py, - pyo3_asyncio::tokio::get_current_locals(py)?, - async move { - let wav = synthesizer - .synthesis( - &audio_query, - StyleId::new(style_id), - &SynthesisOptions { - enable_interrogative_upspeak, - }, - ) - .await; - Python::with_gil(|py| { - let wav = wav.into_py_result(py)?; - Ok(PyBytes::new(py, &wav).to_object(py)) - }) - }, - ) - } - - #[pyo3(signature=( - kana, - style_id, - enable_interrogative_upspeak = TtsOptions::default().enable_interrogative_upspeak - ))] - fn tts_from_kana<'py>( - &self, - kana: &str, - style_id: u32, - enable_interrogative_upspeak: bool, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let style_id = StyleId::new(style_id); - let options = TtsOptions { - enable_interrogative_upspeak, - }; - let synthesizer = self.synthesizer.get()?.clone(); - let kana = kana.to_owned(); - pyo3_asyncio::tokio::future_into_py_with_locals( - py, - pyo3_asyncio::tokio::get_current_locals(py)?, - async move { - let wav = synthesizer.tts_from_kana(&kana, style_id, &options).await; - - Python::with_gil(|py| { - let wav = wav.into_py_result(py)?; - Ok(PyBytes::new(py, &wav).to_object(py)) - }) - }, - ) - } - - #[pyo3(signature=( - text, - style_id, - enable_interrogative_upspeak = TtsOptions::default().enable_interrogative_upspeak - ))] - fn tts<'py>( - &self, - text: &str, - style_id: u32, - enable_interrogative_upspeak: bool, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let style_id = StyleId::new(style_id); - let options = TtsOptions { - enable_interrogative_upspeak, - }; - let synthesizer = self.synthesizer.get()?.clone(); - let text = text.to_owned(); - pyo3_asyncio::tokio::future_into_py_with_locals( - py, - pyo3_asyncio::tokio::get_current_locals(py)?, - async move { - let wav = synthesizer.tts(&text, style_id, &options).await; - - Python::with_gil(|py| { - let wav = wav.into_py_result(py)?; - Ok(PyBytes::new(py, &wav).to_object(py)) - }) - }, - ) - } - - fn close(&mut self) { - self.synthesizer.close() - } -} - struct Closable { content: MaybeClosed, marker: PhantomData, @@ -552,96 +147,13 @@ fn _to_zenkaku(text: &str) -> PyResult { Ok(voicevox_core::__internal::to_zenkaku(text)) } -#[pyclass] -#[derive(Default, Debug, Clone)] -struct UserDict { - dict: Arc, -} - -#[pymethods] -impl UserDict { - #[new] - fn new() -> Self { - Self::default() - } - - fn load<'py>(&self, path: &str, py: Python<'py>) -> PyResult<&'py PyAny> { - let this = self.dict.clone(); - let path = path.to_owned(); - - pyo3_asyncio::tokio::future_into_py(py, async move { - let result = this.load(&path).await; - Python::with_gil(|py| result.into_py_result(py)) - }) - } - - fn save<'py>(&self, path: &str, py: Python<'py>) -> PyResult<&'py PyAny> { - let this = self.dict.clone(); - let path = path.to_owned(); - - pyo3_asyncio::tokio::future_into_py(py, async move { - let result = this.save(&path).await; - Python::with_gil(|py| result.into_py_result(py)) - }) - } - - fn add_word( - &mut self, - #[pyo3(from_py_with = "to_rust_user_dict_word")] word: UserDictWord, - py: Python<'_>, - ) -> PyResult { - let uuid = self.dict.add_word(word).into_py_result(py)?; - - to_py_uuid(py, uuid) - } - - fn update_word( - &mut self, - #[pyo3(from_py_with = "to_rust_uuid")] word_uuid: Uuid, - #[pyo3(from_py_with = "to_rust_user_dict_word")] word: UserDictWord, - py: Python<'_>, - ) -> PyResult<()> { - self.dict.update_word(word_uuid, word).into_py_result(py)?; - Ok(()) - } - - fn remove_word( - &mut self, - #[pyo3(from_py_with = "to_rust_uuid")] word_uuid: Uuid, - py: Python<'_>, - ) -> PyResult<()> { - self.dict.remove_word(word_uuid).into_py_result(py)?; - Ok(()) - } - - fn import_dict(&mut self, other: &UserDict, py: Python<'_>) -> PyResult<()> { - self.dict.import(&other.dict).into_py_result(py)?; - Ok(()) - } - - #[getter] - fn words<'py>(&self, py: Python<'py>) -> PyResult<&'py PyDict> { - let words = self.dict.with_words(|words| { - words - .iter() - .map(|(&uuid, word)| { - let uuid = to_py_uuid(py, uuid)?; - let word = to_py_user_dict_word(py, word)?; - Ok((uuid, word)) - }) - .collect::>>() - })?; - Ok(words.into_py_dict(py)) - } -} - mod blocking { - use std::sync::Arc; + use std::{path::PathBuf, sync::Arc}; use camino::Utf8PathBuf; use pyo3::{ pyclass, pymethods, - types::{IntoPyDict as _, PyBytes, PyDict, PyList}, + types::{IntoPyDict as _, PyBytes, PyDict, PyList, PyString}, PyAny, PyObject, PyRef, PyResult, Python, }; use uuid::Uuid; @@ -661,10 +173,7 @@ mod blocking { #[pymethods] impl VoiceModel { #[staticmethod] - fn from_path( - py: Python<'_>, - #[pyo3(from_py_with = "crate::convert::from_utf8_path")] path: Utf8PathBuf, - ) -> PyResult { + fn from_path(py: Python<'_>, path: PathBuf) -> PyResult { let model = voicevox_core::blocking::VoiceModel::from_path(path).into_py_result(py)?; Ok(Self { model }) } @@ -786,7 +295,12 @@ mod blocking { .into_py_result(py) } - fn is_loaded_voice_model(&self, voice_model_id: &str) -> PyResult { + // C APIの挙動と一貫性を持たせる。 + fn is_loaded_voice_model(&self, voice_model_id: &PyString) -> PyResult { + let Ok(voice_model_id) = voice_model_id.to_str() else { + // 与えられたIDがUTF-8ではない場合、それに対応する`VoicdModel`は確実に存在しない + return Ok(false); + }; Ok(self .synthesizer .get()? @@ -1057,3 +571,512 @@ mod blocking { } } } + +mod asyncio { + use std::{path::PathBuf, sync::Arc}; + + use camino::Utf8PathBuf; + use pyo3::{ + pyclass, pymethods, + types::{IntoPyDict as _, PyBytes, PyDict, PyList, PyString}, + PyAny, PyObject, PyRef, PyResult, Python, ToPyObject as _, + }; + use uuid::Uuid; + use voicevox_core::{ + AccelerationMode, AudioQueryModel, InitializeOptions, StyleId, SynthesisOptions, + TtsOptions, UserDictWord, VoiceModelId, + }; + + use crate::{convert::VoicevoxCoreResultExt as _, Closable}; + + #[pyclass] + #[derive(Clone)] + pub(crate) struct VoiceModel { + model: voicevox_core::tokio::VoiceModel, + } + + #[pymethods] + impl VoiceModel { + #[staticmethod] + fn from_path(py: Python<'_>, path: PathBuf) -> PyResult<&PyAny> { + pyo3_asyncio::tokio::future_into_py(py, async move { + let model = voicevox_core::tokio::VoiceModel::from_path(path).await; + let model = Python::with_gil(|py| model.into_py_result(py))?; + Ok(Self { model }) + }) + } + + #[getter] + fn id(&self) -> &str { + self.model.id().raw_voice_model_id() + } + + #[getter] + fn metas<'py>(&self, py: Python<'py>) -> Vec<&'py PyAny> { + crate::convert::to_pydantic_voice_model_meta(self.model.metas(), py).unwrap() + } + } + + #[pyclass] + #[derive(Clone)] + pub(crate) struct OpenJtalk { + open_jtalk: voicevox_core::tokio::OpenJtalk, + } + + #[pymethods] + impl OpenJtalk { + #[allow(clippy::new_ret_no_self)] + #[staticmethod] + fn new( + #[pyo3(from_py_with = "crate::convert::from_utf8_path")] + open_jtalk_dict_dir: Utf8PathBuf, + py: Python<'_>, + ) -> PyResult<&PyAny> { + pyo3_asyncio::tokio::future_into_py(py, async move { + let open_jtalk = voicevox_core::tokio::OpenJtalk::new(open_jtalk_dict_dir).await; + let open_jtalk = Python::with_gil(|py| open_jtalk.into_py_result(py))?; + Ok(Self { open_jtalk }) + }) + } + + fn use_user_dict<'py>(&self, user_dict: UserDict, py: Python<'py>) -> PyResult<&'py PyAny> { + let this = self.open_jtalk.clone(); + + pyo3_asyncio::tokio::future_into_py(py, async move { + let result = this.use_user_dict(&user_dict.dict).await; + Python::with_gil(|py| result.into_py_result(py)) + }) + } + } + + #[pyclass] + pub(crate) struct Synthesizer { + synthesizer: + Closable, Self>, + } + + #[pymethods] + impl Synthesizer { + #[new] + #[pyo3(signature =( + open_jtalk, + acceleration_mode = InitializeOptions::default().acceleration_mode, + cpu_num_threads = InitializeOptions::default().cpu_num_threads, + ))] + fn new( + open_jtalk: OpenJtalk, + #[pyo3(from_py_with = "crate::convert::from_acceleration_mode")] + acceleration_mode: AccelerationMode, + cpu_num_threads: u16, + ) -> PyResult { + let synthesizer = voicevox_core::tokio::Synthesizer::new( + open_jtalk.open_jtalk.clone(), + &InitializeOptions { + acceleration_mode, + cpu_num_threads, + }, + ); + let synthesizer = Python::with_gil(|py| synthesizer.into_py_result(py))?; + let synthesizer = Closable::new(synthesizer); + Ok(Self { synthesizer }) + } + + fn __repr__(&self) -> &'static str { + "Synthesizer { .. }" + } + + fn __enter__(slf: PyRef<'_, Self>) -> PyResult> { + slf.synthesizer.get()?; + Ok(slf) + } + + fn __exit__( + &mut self, + #[allow(unused_variables)] exc_type: &PyAny, + #[allow(unused_variables)] exc_value: &PyAny, + #[allow(unused_variables)] traceback: &PyAny, + ) { + self.close(); + } + + #[getter] + fn is_gpu_mode(&self) -> PyResult { + let synthesizer = self.synthesizer.get()?; + Ok(synthesizer.is_gpu_mode()) + } + + #[getter] + fn metas<'py>(&self, py: Python<'py>) -> PyResult> { + let synthesizer = self.synthesizer.get()?; + crate::convert::to_pydantic_voice_model_meta(&synthesizer.metas(), py) + } + + fn load_voice_model<'py>( + &mut self, + model: &'py PyAny, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let model: VoiceModel = model.extract()?; + let synthesizer = self.synthesizer.get()?.clone(); + pyo3_asyncio::tokio::future_into_py(py, async move { + let result = synthesizer.load_voice_model(&model.model).await; + Python::with_gil(|py| result.into_py_result(py)) + }) + } + + fn unload_voice_model(&mut self, voice_model_id: &str, py: Python<'_>) -> PyResult<()> { + self.synthesizer + .get()? + .unload_voice_model(&VoiceModelId::new(voice_model_id.to_string())) + .into_py_result(py) + } + + // C APIの挙動と一貫性を持たせる。 + fn is_loaded_voice_model(&self, voice_model_id: &PyString) -> PyResult { + let Ok(voice_model_id) = voice_model_id.to_str() else { + // 与えられたIDがUTF-8ではない場合、それに対応する`VoicdModel`は確実に存在しない + return Ok(false); + }; + Ok(self + .synthesizer + .get()? + .is_loaded_voice_model(&VoiceModelId::new(voice_model_id.to_string()))) + } + + fn audio_query_from_kana<'py>( + &self, + kana: &str, + style_id: u32, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + let kana = kana.to_owned(); + pyo3_asyncio::tokio::future_into_py_with_locals( + py, + pyo3_asyncio::tokio::get_current_locals(py)?, + async move { + let audio_query = synthesizer + .audio_query_from_kana(&kana, StyleId::new(style_id)) + .await; + + Python::with_gil(|py| { + let class = py.import("voicevox_core")?.getattr("AudioQuery")?; + let ret = crate::convert::to_pydantic_dataclass( + audio_query.into_py_result(py)?, + class, + )?; + Ok(ret.to_object(py)) + }) + }, + ) + } + + fn audio_query<'py>( + &self, + text: &str, + style_id: u32, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + let text = text.to_owned(); + pyo3_asyncio::tokio::future_into_py_with_locals( + py, + pyo3_asyncio::tokio::get_current_locals(py)?, + async move { + let audio_query = synthesizer.audio_query(&text, StyleId::new(style_id)).await; + + Python::with_gil(|py| { + let audio_query = audio_query.into_py_result(py)?; + let class = py.import("voicevox_core")?.getattr("AudioQuery")?; + let ret = crate::convert::to_pydantic_dataclass(audio_query, class)?; + Ok(ret.to_object(py)) + }) + }, + ) + } + + fn create_accent_phrases_from_kana<'py>( + &self, + kana: &str, + style_id: u32, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + let kana = kana.to_owned(); + pyo3_asyncio::tokio::future_into_py_with_locals( + py, + pyo3_asyncio::tokio::get_current_locals(py)?, + async move { + let accent_phrases = synthesizer + .create_accent_phrases_from_kana(&kana, StyleId::new(style_id)) + .await; + Python::with_gil(|py| { + let class = py.import("voicevox_core")?.getattr("AccentPhrase")?; + let accent_phrases = accent_phrases + .into_py_result(py)? + .iter() + .map(|ap| crate::convert::to_pydantic_dataclass(ap, class)) + .collect::>>(); + let list = PyList::new(py, accent_phrases); + Ok(list.to_object(py)) + }) + }, + ) + } + + fn create_accent_phrases<'py>( + &self, + text: &str, + style_id: u32, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + let text = text.to_owned(); + pyo3_asyncio::tokio::future_into_py_with_locals( + py, + pyo3_asyncio::tokio::get_current_locals(py)?, + async move { + let accent_phrases = synthesizer + .create_accent_phrases(&text, StyleId::new(style_id)) + .await; + Python::with_gil(|py| { + let class = py.import("voicevox_core")?.getattr("AccentPhrase")?; + let accent_phrases = accent_phrases + .into_py_result(py)? + .iter() + .map(|ap| crate::convert::to_pydantic_dataclass(ap, class)) + .collect::>>(); + let list = PyList::new(py, accent_phrases); + Ok(list.to_object(py)) + }) + }, + ) + } + + fn replace_mora_data<'py>( + &self, + accent_phrases: &'py PyList, + style_id: u32, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + crate::convert::async_modify_accent_phrases( + accent_phrases, + StyleId::new(style_id), + py, + |a, s| async move { synthesizer.replace_mora_data(&a, s).await }, + ) + } + + fn replace_phoneme_length<'py>( + &self, + accent_phrases: &'py PyList, + style_id: u32, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + crate::convert::async_modify_accent_phrases( + accent_phrases, + StyleId::new(style_id), + py, + |a, s| async move { synthesizer.replace_phoneme_length(&a, s).await }, + ) + } + + fn replace_mora_pitch<'py>( + &self, + accent_phrases: &'py PyList, + style_id: u32, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + crate::convert::async_modify_accent_phrases( + accent_phrases, + StyleId::new(style_id), + py, + |a, s| async move { synthesizer.replace_mora_pitch(&a, s).await }, + ) + } + + #[pyo3(signature=(audio_query,style_id,enable_interrogative_upspeak = TtsOptions::default().enable_interrogative_upspeak))] + fn synthesis<'py>( + &self, + #[pyo3(from_py_with = "crate::convert::from_dataclass")] audio_query: AudioQueryModel, + style_id: u32, + enable_interrogative_upspeak: bool, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + pyo3_asyncio::tokio::future_into_py_with_locals( + py, + pyo3_asyncio::tokio::get_current_locals(py)?, + async move { + let wav = synthesizer + .synthesis( + &audio_query, + StyleId::new(style_id), + &SynthesisOptions { + enable_interrogative_upspeak, + }, + ) + .await; + Python::with_gil(|py| { + let wav = wav.into_py_result(py)?; + Ok(PyBytes::new(py, &wav).to_object(py)) + }) + }, + ) + } + + #[pyo3(signature=( + kana, + style_id, + enable_interrogative_upspeak = TtsOptions::default().enable_interrogative_upspeak + ))] + fn tts_from_kana<'py>( + &self, + kana: &str, + style_id: u32, + enable_interrogative_upspeak: bool, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let style_id = StyleId::new(style_id); + let options = TtsOptions { + enable_interrogative_upspeak, + }; + let synthesizer = self.synthesizer.get()?.clone(); + let kana = kana.to_owned(); + pyo3_asyncio::tokio::future_into_py_with_locals( + py, + pyo3_asyncio::tokio::get_current_locals(py)?, + async move { + let wav = synthesizer.tts_from_kana(&kana, style_id, &options).await; + + Python::with_gil(|py| { + let wav = wav.into_py_result(py)?; + Ok(PyBytes::new(py, &wav).to_object(py)) + }) + }, + ) + } + + #[pyo3(signature=( + text, + style_id, + enable_interrogative_upspeak = TtsOptions::default().enable_interrogative_upspeak + ))] + fn tts<'py>( + &self, + text: &str, + style_id: u32, + enable_interrogative_upspeak: bool, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let style_id = StyleId::new(style_id); + let options = TtsOptions { + enable_interrogative_upspeak, + }; + let synthesizer = self.synthesizer.get()?.clone(); + let text = text.to_owned(); + pyo3_asyncio::tokio::future_into_py_with_locals( + py, + pyo3_asyncio::tokio::get_current_locals(py)?, + async move { + let wav = synthesizer.tts(&text, style_id, &options).await; + + Python::with_gil(|py| { + let wav = wav.into_py_result(py)?; + Ok(PyBytes::new(py, &wav).to_object(py)) + }) + }, + ) + } + + fn close(&mut self) { + self.synthesizer.close() + } + } + + #[pyclass] + #[derive(Default, Debug, Clone)] + pub(crate) struct UserDict { + dict: Arc, + } + + #[pymethods] + impl UserDict { + #[new] + fn new() -> Self { + Self::default() + } + + fn load<'py>(&self, path: &str, py: Python<'py>) -> PyResult<&'py PyAny> { + let this = self.dict.clone(); + let path = path.to_owned(); + + pyo3_asyncio::tokio::future_into_py(py, async move { + let result = this.load(&path).await; + Python::with_gil(|py| result.into_py_result(py)) + }) + } + + fn save<'py>(&self, path: &str, py: Python<'py>) -> PyResult<&'py PyAny> { + let this = self.dict.clone(); + let path = path.to_owned(); + + pyo3_asyncio::tokio::future_into_py(py, async move { + let result = this.save(&path).await; + Python::with_gil(|py| result.into_py_result(py)) + }) + } + + fn add_word( + &mut self, + #[pyo3(from_py_with = "crate::convert::to_rust_user_dict_word")] word: UserDictWord, + py: Python<'_>, + ) -> PyResult { + let uuid = self.dict.add_word(word).into_py_result(py)?; + + crate::convert::to_py_uuid(py, uuid) + } + + fn update_word( + &mut self, + #[pyo3(from_py_with = "crate::convert::to_rust_uuid")] word_uuid: Uuid, + #[pyo3(from_py_with = "crate::convert::to_rust_user_dict_word")] word: UserDictWord, + py: Python<'_>, + ) -> PyResult<()> { + self.dict.update_word(word_uuid, word).into_py_result(py)?; + Ok(()) + } + + fn remove_word( + &mut self, + #[pyo3(from_py_with = "crate::convert::to_rust_uuid")] word_uuid: Uuid, + py: Python<'_>, + ) -> PyResult<()> { + self.dict.remove_word(word_uuid).into_py_result(py)?; + Ok(()) + } + + fn import_dict(&mut self, other: &UserDict, py: Python<'_>) -> PyResult<()> { + self.dict.import(&other.dict).into_py_result(py)?; + Ok(()) + } + + #[getter] + fn words<'py>(&self, py: Python<'py>) -> PyResult<&'py PyDict> { + let words = self.dict.with_words(|words| { + words + .iter() + .map(|(&uuid, word)| { + let uuid = crate::convert::to_py_uuid(py, uuid)?; + let word = crate::convert::to_py_user_dict_word(py, word)?; + Ok((uuid, word)) + }) + .collect::>>() + })?; + Ok(words.into_py_dict(py)) + } + } +} diff --git a/docs/vvm.md b/docs/vvm.md index 572ffee1a..c2de7fb41 100644 --- a/docs/vvm.md +++ b/docs/vvm.md @@ -1,9 +1,24 @@ # VVM ファイル -音声合成するために必要な onnx モデルファイルなどがまとめられた zip 形式のファイル。 -root パスに確定で`manifest.json`を持つ。 +***VVM ファイル*** は、音声合成に必要な声情報を含むファイルである。 + +より正確には、音声合成のモデル重みファイルなどを含む zip 形式のアーカイブである。拡張子は `.vvm`。 +以下の内部ディレクトリ構造を持つ: + +- `{filename}.vvm` + - `manifest.json` + - `metas.json` + - + - + - + +model は `.onnx` や `.bin` など様々ある。例えば `sample.vvm` は `predict_duration.onnx` / `predict_intonation.onnx` / `decode.onnx` を含む。 + +VOICEVOX OSS が提供する VVM には [`sample.vvm`](https://github.com/VOICEVOX/voicevox_core/tree/main/model) がある。 +製品版 VOICEVOX で利用される VVM は [こちらのレポジトリ](https://github.com/VOICEVOX/voicevox_fat_resource/tree/main/core/model) で確認できる。 ## マニフェストファイル -ファイルの構成や、onnx モデルなどを読み込む・利用するのに必要な情報を記述した json ファイル。 -root パスに`manifest.json`として配置する。 +VVM における ***マニフェストファイル*** は、VVM ファイルの構成や、onnx モデルなどを読み込む・利用するのに必要な情報を記述したファイルである。 +json 形式で記述され、root パスに`manifest.json`として配置する。 +[VOICEVOX CORE のソースコード](https://github.com/VOICEVOX/voicevox_core/blob/main/crates/voicevox_core/src/manifest.rs) 内で `Manifest` 構造体としてスキーマが定義されている。 diff --git a/rust-toolchain b/rust-toolchain index 32a6ce3c7..b1131583c 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -1.76.0 +1.77.1