diff --git a/crates/voicevox_core/src/__internal/interop.rs b/crates/voicevox_core/src/__internal/interop.rs
index d76df9d88..c8cd7101f 100644
--- a/crates/voicevox_core/src/__internal/interop.rs
+++ b/crates/voicevox_core/src/__internal/interop.rs
@@ -1 +1 @@
-pub use crate::synthesizer::PerformInference;
+pub use crate::synthesizer::blocking::PerformInference;
diff --git a/crates/voicevox_core/src/engine/open_jtalk.rs b/crates/voicevox_core/src/engine/open_jtalk.rs
index e81db51ee..ad1f8c19e 100644
--- a/crates/voicevox_core/src/engine/open_jtalk.rs
+++ b/crates/voicevox_core/src/engine/open_jtalk.rs
@@ -1,15 +1,4 @@
-use std::io::Write;
-use std::sync::Arc;
-use std::{path::Path, sync::Mutex};
-
-use anyhow::anyhow;
-use tempfile::NamedTempFile;
-
-use ::open_jtalk::{
-    mecab_dict_index, text2mecab, JpCommon, ManagedResource, Mecab, Njd, Text2MecabError,
-};
-
-use crate::error::ErrorRepr;
+use ::open_jtalk::Text2MecabError;
 
 #[derive(thiserror::Error, Debug)]
 #[error("`{function}`の実行が失敗しました")]
@@ -19,199 +8,218 @@ pub(crate) struct OpenjtalkFunctionError {
     source: Option<Text2MecabError>,
 }
 
-struct Resources {
-    mecab: ManagedResource<Mecab>,
-    njd: ManagedResource<Njd>,
-    jpcommon: ManagedResource<JpCommon>,
+pub trait FullcontextExtractor: Clone + Send + Sync + 'static {
+    fn extract_fullcontext(&self, text: &str) -> anyhow::Result<Vec<String>>;
 }
 
-#[allow(unsafe_code)]
-unsafe impl Send for Resources {}
-
-impl self::blocking::OpenJtalk {
-    pub fn new(open_jtalk_dict_dir: impl AsRef<Path>) -> crate::result::Result<Self> {
-        let dict_dir = open_jtalk_dict_dir
-            .as_ref()
-            .to_str()
-            .unwrap_or_else(|| todo!()) // FIXME: `camino::Utf8Path`を要求するようにする
-            .to_owned();
-
-        // FIXME: この`{}`はGitのdiffを抑えるためだけに存在
-        {
-            let mut resources = Resources {
-                mecab: ManagedResource::initialize(),
-                njd: ManagedResource::initialize(),
-                jpcommon: ManagedResource::initialize(),
-            };
+pub(crate) mod blocking {
+    use std::{
+        io::Write as _,
+        path::Path,
+        sync::{Arc, Mutex},
+    };
 
-            let result = resources.mecab.load(&*dict_dir);
-            if !result {
-                // FIXME: 「システム辞書を読もうとしたけど読めなかった」というエラーをちゃんと用意する
-                return Err(ErrorRepr::NotLoadedOpenjtalkDict.into());
-            }
+    use anyhow::anyhow;
+    use open_jtalk::{mecab_dict_index, text2mecab, JpCommon, ManagedResource, Mecab, Njd};
+    use tempfile::NamedTempFile;
 
-            Ok(Self(Arc::new(self::blocking::Inner {
-                resources: Mutex::new(resources),
-                dict_dir,
-            })))
-        }
-    }
+    use crate::error::ErrorRepr;
 
-    /// ユーザー辞書を設定する。
-    ///
-    /// この関数を呼び出した後にユーザー辞書を変更した場合は、再度この関数を呼ぶ必要がある。
-    pub fn use_user_dict(
-        &self,
-        user_dict: &crate::blocking::UserDict,
-    ) -> crate::result::Result<()> {
-        let words = &user_dict.to_mecab_format();
-        self.0.use_user_dict(words)
-    }
-}
+    use super::{FullcontextExtractor, OpenjtalkFunctionError};
 
-impl self::tokio::OpenJtalk {
-    pub async fn new(open_jtalk_dict_dir: impl AsRef<Path>) -> crate::result::Result<Self> {
-        let open_jtalk_dict_dir = open_jtalk_dict_dir.as_ref().to_owned();
-        let blocking =
-            crate::task::asyncify(|| self::blocking::OpenJtalk::new(open_jtalk_dict_dir)).await?;
-        Ok(Self(blocking))
-    }
+    /// テキスト解析器としてのOpen JTalk。
+    #[derive(Clone)]
+    pub struct OpenJtalk(pub(super) Arc<Inner>);
+
+    impl self::OpenJtalk {
+        pub fn new(open_jtalk_dict_dir: impl AsRef<Path>) -> crate::result::Result<Self> {
+            let dict_dir = open_jtalk_dict_dir
+                .as_ref()
+                .to_str()
+                .unwrap_or_else(|| todo!()) // FIXME: `camino::Utf8Path`を要求するようにする
+                .to_owned();
+
+            // FIXME: この`{}`はGitのdiffを抑えるためだけに存在
+            {
+                let mut resources = Resources {
+                    mecab: ManagedResource::initialize(),
+                    njd: ManagedResource::initialize(),
+                    jpcommon: ManagedResource::initialize(),
+                };
+
+                let result = resources.mecab.load(&*dict_dir);
+                if !result {
+                    // FIXME: 「システム辞書を読もうとしたけど読めなかった」というエラーをちゃんと用意する
+                    return Err(ErrorRepr::NotLoadedOpenjtalkDict.into());
+                }
+
+                Ok(Self(Arc::new(Inner {
+                    resources: Mutex::new(resources),
+                    dict_dir,
+                })))
+            }
+        }
 
-    /// ユーザー辞書を設定する。
-    ///
-    /// この関数を呼び出した後にユーザー辞書を変更した場合は、再度この関数を呼ぶ必要がある。
-    pub async fn use_user_dict(
-        &self,
-        user_dict: &crate::tokio::UserDict,
-    ) -> crate::result::Result<()> {
-        let inner = self.0 .0.clone();
-        let words = user_dict.to_mecab_format();
-        crate::task::asyncify(move || inner.use_user_dict(&words)).await
+        /// ユーザー辞書を設定する。
+        ///
+        /// この関数を呼び出した後にユーザー辞書を変更した場合は、再度この関数を呼ぶ必要がある。
+        pub fn use_user_dict(
+            &self,
+            user_dict: &crate::blocking::UserDict,
+        ) -> crate::result::Result<()> {
+            let words = &user_dict.to_mecab_format();
+            self.0.use_user_dict(words)
+        }
     }
-}
 
-impl self::blocking::Inner {
-    // FIXME: 中断可能にする
-    fn use_user_dict(&self, words: &str) -> crate::result::Result<()> {
-        let result = {
-            // ユーザー辞書用のcsvを作成
-            let mut temp_csv =
-                NamedTempFile::new().map_err(|e| ErrorRepr::UseUserDict(e.into()))?;
-            temp_csv
-                .write_all(words.as_ref())
-                .map_err(|e| ErrorRepr::UseUserDict(e.into()))?;
-            let temp_csv_path = temp_csv.into_temp_path();
-            let temp_dict = NamedTempFile::new().map_err(|e| ErrorRepr::UseUserDict(e.into()))?;
-            let temp_dict_path = temp_dict.into_temp_path();
-
-            // Mecabでユーザー辞書をコンパイル
-            // TODO: エラー（SEGV）が出るパターンを把握し、それをRust側で防ぐ。
-            mecab_dict_index(&[
-                "mecab-dict-index",
-                "-d",
-                &self.dict_dir,
-                "-u",
-                temp_dict_path.to_str().unwrap(),
-                "-f",
-                "utf-8",
-                "-t",
-                "utf-8",
-                temp_csv_path.to_str().unwrap(),
-                "-q",
-            ]);
-
-            let Resources { mecab, .. } = &mut *self.resources.lock().unwrap();
-
-            mecab.load_with_userdic(self.dict_dir.as_ref(), Some(Path::new(&temp_dict_path)))
-        };
-
-        if !result {
-            return Err(ErrorRepr::UseUserDict(anyhow!("辞書のコンパイルに失敗しました")).into());
+    impl FullcontextExtractor for self::OpenJtalk {
+        fn extract_fullcontext(&self, text: &str) -> anyhow::Result<Vec<String>> {
+            let Resources {
+                mecab,
+                njd,
+                jpcommon,
+            } = &mut *self.0.resources.lock().unwrap();
+
+            jpcommon.refresh();
+            njd.refresh();
+            mecab.refresh();
+
+            let mecab_text = text2mecab(text).map_err(|e| OpenjtalkFunctionError {
+                function: "text2mecab",
+                source: Some(e),
+            })?;
+            if mecab.analysis(mecab_text) {
+                njd.mecab2njd(
+                    mecab.get_feature().ok_or(OpenjtalkFunctionError {
+                        function: "Mecab_get_feature",
+                        source: None,
+                    })?,
+                    mecab.get_size(),
+                );
+                njd.set_pronunciation();
+                njd.set_digit();
+                njd.set_accent_phrase();
+                njd.set_accent_type();
+                njd.set_unvoiced_vowel();
+                njd.set_long_vowel();
+                jpcommon.njd2jpcommon(njd);
+                jpcommon.make_label();
+                jpcommon
+                    .get_label_feature_to_iter()
+                    .ok_or(OpenjtalkFunctionError {
+                        function: "JPCommon_get_label_feature",
+                        source: None,
+                    })
+                    .map(|iter| iter.map(|s| s.to_string()).collect())
+                    .map_err(Into::into)
+            } else {
+                Err(OpenjtalkFunctionError {
+                    function: "Mecab_analysis",
+                    source: None,
+                }
+                .into())
+            }
         }
+    }
 
-        Ok(())
+    pub(super) struct Inner {
+        resources: std::sync::Mutex<Resources>,
+        dict_dir: String, // FIXME: `camino::Utf8PathBuf`にする
     }
-}
 
-pub trait FullcontextExtractor: Clone + Send + Sync + 'static {
-    fn extract_fullcontext(&self, text: &str) -> anyhow::Result<Vec<String>>;
-}
+    impl Inner {
+        // FIXME: 中断可能にする
+        pub(super) fn use_user_dict(&self, words: &str) -> crate::result::Result<()> {
+            let result = {
+                // ユーザー辞書用のcsvを作成
+                let mut temp_csv =
+                    NamedTempFile::new().map_err(|e| ErrorRepr::UseUserDict(e.into()))?;
+                temp_csv
+                    .write_all(words.as_ref())
+                    .map_err(|e| ErrorRepr::UseUserDict(e.into()))?;
+                let temp_csv_path = temp_csv.into_temp_path();
+                let temp_dict =
+                    NamedTempFile::new().map_err(|e| ErrorRepr::UseUserDict(e.into()))?;
+                let temp_dict_path = temp_dict.into_temp_path();
+
+                // Mecabでユーザー辞書をコンパイル
+                // TODO: エラー（SEGV）が出るパターンを把握し、それをRust側で防ぐ。
+                mecab_dict_index(&[
+                    "mecab-dict-index",
+                    "-d",
+                    &self.dict_dir,
+                    "-u",
+                    temp_dict_path.to_str().unwrap(),
+                    "-f",
+                    "utf-8",
+                    "-t",
+                    "utf-8",
+                    temp_csv_path.to_str().unwrap(),
+                    "-q",
+                ]);
+
+                let Resources { mecab, .. } = &mut *self.resources.lock().unwrap();
+
+                mecab.load_with_userdic(self.dict_dir.as_ref(), Some(Path::new(&temp_dict_path)))
+            };
 
-impl FullcontextExtractor for self::blocking::OpenJtalk {
-    fn extract_fullcontext(&self, text: &str) -> anyhow::Result<Vec<String>> {
-        let Resources {
-            mecab,
-            njd,
-            jpcommon,
-        } = &mut *self.0.resources.lock().unwrap();
-
-        jpcommon.refresh();
-        njd.refresh();
-        mecab.refresh();
-
-        let mecab_text = text2mecab(text).map_err(|e| OpenjtalkFunctionError {
-            function: "text2mecab",
-            source: Some(e),
-        })?;
-        if mecab.analysis(mecab_text) {
-            njd.mecab2njd(
-                mecab.get_feature().ok_or(OpenjtalkFunctionError {
-                    function: "Mecab_get_feature",
-                    source: None,
-                })?,
-                mecab.get_size(),
-            );
-            njd.set_pronunciation();
-            njd.set_digit();
-            njd.set_accent_phrase();
-            njd.set_accent_type();
-            njd.set_unvoiced_vowel();
-            njd.set_long_vowel();
-            jpcommon.njd2jpcommon(njd);
-            jpcommon.make_label();
-            jpcommon
-                .get_label_feature_to_iter()
-                .ok_or(OpenjtalkFunctionError {
-                    function: "JPCommon_get_label_feature",
-                    source: None,
-                })
-                .map(|iter| iter.map(|s| s.to_string()).collect())
-                .map_err(Into::into)
-        } else {
-            Err(OpenjtalkFunctionError {
-                function: "Mecab_analysis",
-                source: None,
+            if !result {
+                return Err(
+                    ErrorRepr::UseUserDict(anyhow!("辞書のコンパイルに失敗しました")).into(),
+                );
             }
-            .into())
+
+            Ok(())
         }
     }
-}
 
-impl FullcontextExtractor for self::tokio::OpenJtalk {
-    fn extract_fullcontext(&self, text: &str) -> anyhow::Result<Vec<String>> {
-        self.0.extract_fullcontext(text)
+    struct Resources {
+        mecab: ManagedResource<Mecab>,
+        njd: ManagedResource<Njd>,
+        jpcommon: ManagedResource<JpCommon>,
     }
+
+    // FIXME: open_jtalk-rs側で宣言する
+    #[allow(unsafe_code)]
+    unsafe impl Send for Resources {}
 }
 
-pub(crate) mod blocking {
-    use std::sync::Arc;
+pub(crate) mod tokio {
+    use std::path::Path;
 
-    use super::Resources;
+    use super::FullcontextExtractor;
 
     /// テキスト解析器としてのOpen JTalk。
     #[derive(Clone)]
-    pub struct OpenJtalk(pub(super) Arc<Inner>);
+    pub struct OpenJtalk(super::blocking::OpenJtalk);
+
+    impl self::OpenJtalk {
+        pub async fn new(open_jtalk_dict_dir: impl AsRef<Path>) -> crate::result::Result<Self> {
+            let open_jtalk_dict_dir = open_jtalk_dict_dir.as_ref().to_owned();
+            let blocking =
+                crate::task::asyncify(|| super::blocking::OpenJtalk::new(open_jtalk_dict_dir))
+                    .await?;
+            Ok(Self(blocking))
+        }
 
-    pub(super) struct Inner {
-        pub(super) resources: std::sync::Mutex<Resources>,
-        pub(super) dict_dir: String, // FIXME: `camino::Utf8PathBuf`にする
+        /// ユーザー辞書を設定する。
+        ///
+        /// この関数を呼び出した後にユーザー辞書を変更した場合は、再度この関数を呼ぶ必要がある。
+        pub async fn use_user_dict(
+            &self,
+            user_dict: &crate::tokio::UserDict,
+        ) -> crate::result::Result<()> {
+            let inner = self.0 .0.clone();
+            let words = user_dict.to_mecab_format();
+            crate::task::asyncify(move || inner.use_user_dict(&words)).await
+        }
     }
-}
 
-pub(crate) mod tokio {
-    /// テキスト解析器としてのOpen JTalk。
-    #[derive(Clone)]
-    pub struct OpenJtalk(pub(super) super::blocking::OpenJtalk);
+    impl FullcontextExtractor for self::OpenJtalk {
+        fn extract_fullcontext(&self, text: &str) -> anyhow::Result<Vec<String>> {
+            self.0.extract_fullcontext(text)
+        }
+    }
 }
 
 #[cfg(test)]
diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs
index b2dd02764..202e917c7 100644
--- a/crates/voicevox_core/src/synthesizer.rs
+++ b/crates/voicevox_core/src/synthesizer.rs
@@ -1,25 +1,4 @@
-use std::io::{Cursor, Write as _};
-
-use enum_map::enum_map;
-
-use crate::{
-    engine::{
-        self, create_kana, parse_kana, AccentPhraseModel, FullcontextExtractor, MoraModel,
-        OjtPhoneme, Utterance,
-    },
-    error::ErrorRepr,
-    infer::{
-        domain::{
-            DecodeInput, DecodeOutput, InferenceOperationImpl, PredictDurationInput,
-            PredictDurationOutput, PredictIntonationInput, PredictIntonationOutput,
-        },
-        runtimes::Onnxruntime,
-        status::Status,
-        InferenceSessionOptions,
-    },
-    numerics::F32Ext as _,
-    AudioQueryModel, Result, StyleId, SupportedDevices, VoiceModelId, VoiceModelMeta,
-};
+use crate::infer::runtimes::Onnxruntime;
 
 /// [`blocking::Synthesizer::synthesis`]および[`tokio::Synthesizer::synthesis`]のオプション。
 ///
@@ -89,1290 +68,1322 @@ pub struct InitializeOptions {
     pub cpu_num_threads: u16,
 }
 
-const DEFAULT_SAMPLING_RATE: u32 = 24000;
-
 pub(crate) type InferenceRuntimeImpl = Onnxruntime;
 
-// FIXME: docを書く
-impl<O: Send + Sync + 'static> self::tokio::Synthesizer<O> {
-    pub fn new(open_jtalk: O, options: &InitializeOptions) -> Result<Self> {
-        self::blocking::Synthesizer::new(open_jtalk, options)
-            .map(Into::into)
-            .map(Self)
-    }
-
-    pub fn is_gpu_mode(&self) -> bool {
-        self.0.is_gpu_mode()
-    }
-
-    pub async fn load_voice_model(&self, model: &crate::tokio::VoiceModel) -> Result<()> {
-        let model_bytes = &model.read_inference_models().await?;
-        self.0.status.insert_model(model.header(), model_bytes)
-    }
-
-    pub fn unload_voice_model(&self, voice_model_id: &VoiceModelId) -> Result<()> {
-        self.0.unload_voice_model(voice_model_id)
-    }
-
-    pub fn is_loaded_voice_model(&self, voice_model_id: &VoiceModelId) -> bool {
-        self.0.is_loaded_voice_model(voice_model_id)
-    }
-
-    #[doc(hidden)]
-    pub fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool {
-        self.0.is_loaded_model_by_style_id(style_id)
-    }
-
-    pub fn metas(&self) -> VoiceModelMeta {
-        self.0.metas()
-    }
-
-    pub async fn synthesis(
-        &self,
-        audio_query: &AudioQueryModel,
-        style_id: StyleId,
-        options: &SynthesisOptions,
-    ) -> Result<Vec<u8>> {
-        let blocking = self.0.clone();
-        let audio_query = audio_query.clone();
-        let options = options.clone();
-
-        crate::task::asyncify(move || blocking.synthesis(&audio_query, style_id, &options)).await
-    }
-
-    pub async fn create_accent_phrases_from_kana(
-        &self,
-        kana: &str,
-        style_id: StyleId,
-    ) -> Result<Vec<AccentPhraseModel>> {
-        let blocking = self.0.clone();
-        let kana = kana.to_owned();
-
-        crate::task::asyncify(move || blocking.create_accent_phrases_from_kana(&kana, style_id))
-            .await
-    }
-
-    pub async fn replace_mora_data(
-        &self,
-        accent_phrases: &[AccentPhraseModel],
-        style_id: StyleId,
-    ) -> Result<Vec<AccentPhraseModel>> {
-        let blocking = self.0.clone();
-        let accent_phrases = accent_phrases.to_owned();
-
-        crate::task::asyncify(move || blocking.replace_mora_data(&accent_phrases, style_id)).await
-    }
-
-    pub async fn replace_phoneme_length(
-        &self,
-        accent_phrases: &[AccentPhraseModel],
-        style_id: StyleId,
-    ) -> Result<Vec<AccentPhraseModel>> {
-        let blocking = self.0.clone();
-        let accent_phrases = accent_phrases.to_owned();
+pub(crate) mod blocking {
+    // FIXME: ここのdocのコードブロックはasync版のものなので、`tokio`モジュールの方に移した上で、
+    // (ブロッキング版をpublic APIにするならの話ではあるが)ブロッキング版はブロッキング版でコード例
+    // を用意する
 
-        crate::task::asyncify(move || blocking.replace_phoneme_length(&accent_phrases, style_id))
-            .await
-    }
+    use std::io::{Cursor, Write as _};
 
-    pub async fn replace_mora_pitch(
-        &self,
-        accent_phrases: &[AccentPhraseModel],
-        style_id: StyleId,
-    ) -> Result<Vec<AccentPhraseModel>> {
-        let blocking = self.0.clone();
-        let accent_phrases = accent_phrases.to_owned();
+    use enum_map::enum_map;
 
-        crate::task::asyncify(move || blocking.replace_mora_pitch(&accent_phrases, style_id)).await
-    }
+    use crate::{
+        engine::{self, create_kana, parse_kana, MoraModel, OjtPhoneme, Utterance},
+        error::ErrorRepr,
+        infer::{
+            domain::{
+                DecodeInput, DecodeOutput, InferenceDomainImpl, InferenceOperationImpl,
+                PredictDurationInput, PredictDurationOutput, PredictIntonationInput,
+                PredictIntonationOutput,
+            },
+            status::Status,
+            InferenceSessionOptions,
+        },
+        numerics::F32Ext as _,
+        AccentPhraseModel, AudioQueryModel, FullcontextExtractor, Result, StyleId,
+        SupportedDevices, SynthesisOptions, VoiceModelId, VoiceModelMeta,
+    };
 
-    pub async fn audio_query_from_kana(
-        &self,
-        kana: &str,
-        style_id: StyleId,
-    ) -> Result<AudioQueryModel> {
-        let blocking = self.0.clone();
-        let kana = kana.to_owned();
+    use super::{AccelerationMode, InferenceRuntimeImpl, InitializeOptions, TtsOptions};
 
-        crate::task::asyncify(move || blocking.audio_query_from_kana(&kana, style_id)).await
-    }
+    const DEFAULT_SAMPLING_RATE: u32 = 24000;
 
-    pub async fn tts_from_kana(
-        &self,
-        kana: &str,
-        style_id: StyleId,
-        options: &TtsOptions,
-    ) -> Result<Vec<u8>> {
-        let blocking = self.0.clone();
-        let kana = kana.to_owned();
-        let options = options.clone();
-
-        crate::task::asyncify(move || blocking.tts_from_kana(&kana, style_id, &options)).await
-    }
-}
+    /// 音声シンセサイザ。
+    pub struct Synthesizer<O> {
+        pub(super) status: Status<InferenceRuntimeImpl, InferenceDomainImpl>,
+        open_jtalk: O,
+        use_gpu: bool,
+    }
+
+    impl<O> self::Synthesizer<O> {
+        /// `Synthesizer`をコンストラクトする。
+        ///
+        /// # Example
+        ///
+        #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537
+        #[cfg_attr(not(windows), doc = "```")]
+        /// # #[tokio::main]
+        /// # async fn main() -> anyhow::Result<()> {
+        /// # use test_util::OPEN_JTALK_DIC_DIR;
+        /// #
+        /// # const ACCELERATION_MODE: AccelerationMode = AccelerationMode::Cpu;
+        /// #
+        /// use std::sync::Arc;
+        ///
+        /// use voicevox_core::{
+        ///     tokio::{OpenJtalk, Synthesizer},
+        ///     AccelerationMode, InitializeOptions,
+        /// };
+        ///
+        /// let mut syntesizer = Synthesizer::new(
+        ///     Arc::new(OpenJtalk::new(OPEN_JTALK_DIC_DIR).await.unwrap()),
+        ///     &InitializeOptions {
+        ///         acceleration_mode: ACCELERATION_MODE,
+        ///         ..Default::default()
+        ///     },
+        /// )?;
+        /// #
+        /// # Ok(())
+        /// # }
+        /// ```
+        pub fn new(open_jtalk: O, options: &InitializeOptions) -> Result<Self> {
+            #[cfg(windows)]
+            list_windows_video_cards();
+
+            let use_gpu = match options.acceleration_mode {
+                AccelerationMode::Auto => {
+                    let supported_devices = SupportedDevices::create()?;
+
+                    if cfg!(feature = "directml") {
+                        *supported_devices.dml()
+                    } else {
+                        *supported_devices.cuda()
+                    }
+                }
+                AccelerationMode::Cpu => false,
+                AccelerationMode::Gpu => true,
+            };
 
-impl<T: FullcontextExtractor> self::tokio::Synthesizer<T> {
-    pub async fn create_accent_phrases(
-        &self,
-        text: &str,
-        style_id: StyleId,
-    ) -> Result<Vec<AccentPhraseModel>> {
-        let blocking = self.0.clone();
-        let text = text.to_owned();
+            if use_gpu && !can_support_gpu_feature()? {
+                return Err(ErrorRepr::GpuSupport.into());
+            }
 
-        crate::task::asyncify(move || blocking.create_accent_phrases(&text, style_id)).await
-    }
+            // 軽いモデルはこちらを使う
+            let light_session_options =
+                InferenceSessionOptions::new(options.cpu_num_threads, false);
 
-    pub async fn audio_query(&self, text: &str, style_id: StyleId) -> Result<AudioQueryModel> {
-        let blocking = self.0.clone();
-        let text = text.to_owned();
+            // 重いモデルはこちらを使う
+            let heavy_session_options =
+                InferenceSessionOptions::new(options.cpu_num_threads, use_gpu);
 
-        crate::task::asyncify(move || blocking.audio_query(&text, style_id)).await
-    }
+            let status = Status::new(enum_map! {
+                InferenceOperationImpl::PredictDuration
+                | InferenceOperationImpl::PredictIntonation => light_session_options,
+                InferenceOperationImpl::Decode => heavy_session_options,
+            });
 
-    pub async fn tts(
-        &self,
-        text: &str,
-        style_id: StyleId,
-        options: &TtsOptions,
-    ) -> Result<Vec<u8>> {
-        let blocking = self.0.clone();
-        let text = text.to_owned();
-        let options = options.clone();
-
-        crate::task::asyncify(move || blocking.tts(&text, style_id, &options)).await
-    }
-}
+            return Ok(Self {
+                status,
+                open_jtalk,
+                use_gpu,
+            });
 
-// FIXME: コードのdiffを抑えるため`impl<O> blocking::Synthesizer<O>`と
-// `impl blocking::Synthesizer<OpenJtalk>`がそれぞれ3つ誕生しているので、一つずつにまとめる
-
-// FIXME: ここのdocのコードブロックはasync版のものなので、↑の方に移した上で、(ブロッキング版を
-// public APIにするならの話ではあるが)ブロッキング版はブロッキング版でコード例を用意する
-impl<O> self::blocking::Synthesizer<O> {
-    /// `Synthesizer`をコンストラクトする。
-    ///
-    /// # Example
-    ///
-    #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537
-    #[cfg_attr(not(windows), doc = "```")]
-    /// # #[tokio::main]
-    /// # async fn main() -> anyhow::Result<()> {
-    /// # use test_util::OPEN_JTALK_DIC_DIR;
-    /// #
-    /// # const ACCELERATION_MODE: AccelerationMode = AccelerationMode::Cpu;
-    /// #
-    /// use std::sync::Arc;
-    ///
-    /// use voicevox_core::{
-    ///     tokio::{OpenJtalk, Synthesizer},
-    ///     AccelerationMode, InitializeOptions,
-    /// };
-    ///
-    /// let mut syntesizer = Synthesizer::new(
-    ///     Arc::new(OpenJtalk::new(OPEN_JTALK_DIC_DIR).await.unwrap()),
-    ///     &InitializeOptions {
-    ///         acceleration_mode: ACCELERATION_MODE,
-    ///         ..Default::default()
-    ///     },
-    /// )?;
-    /// #
-    /// # Ok(())
-    /// # }
-    /// ```
-    pub fn new(open_jtalk: O, options: &InitializeOptions) -> Result<Self> {
-        #[cfg(windows)]
-        list_windows_video_cards();
-
-        let use_gpu = match options.acceleration_mode {
-            AccelerationMode::Auto => {
+            fn can_support_gpu_feature() -> Result<bool> {
                 let supported_devices = SupportedDevices::create()?;
 
                 if cfg!(feature = "directml") {
-                    *supported_devices.dml()
+                    Ok(*supported_devices.dml())
                 } else {
-                    *supported_devices.cuda()
+                    Ok(*supported_devices.cuda())
                 }
             }
-            AccelerationMode::Cpu => false,
-            AccelerationMode::Gpu => true,
-        };
-
-        if use_gpu && !can_support_gpu_feature()? {
-            return Err(ErrorRepr::GpuSupport.into());
         }
 
-        // 軽いモデルはこちらを使う
-        let light_session_options = InferenceSessionOptions::new(options.cpu_num_threads, false);
-
-        // 重いモデルはこちらを使う
-        let heavy_session_options = InferenceSessionOptions::new(options.cpu_num_threads, use_gpu);
-
-        let status = Status::new(enum_map! {
-            InferenceOperationImpl::PredictDuration
-            | InferenceOperationImpl::PredictIntonation => light_session_options,
-            InferenceOperationImpl::Decode => heavy_session_options,
-        });
-
-        return Ok(Self {
-            status,
-            open_jtalk,
-            use_gpu,
-        });
+        /// ハードウェアアクセラレーションがGPUモードか判定する。
+        pub fn is_gpu_mode(&self) -> bool {
+            self.use_gpu
+        }
 
-        fn can_support_gpu_feature() -> Result<bool> {
-            let supported_devices = SupportedDevices::create()?;
+        /// 音声モデルを読み込む。
+        pub fn load_voice_model(&self, model: &crate::blocking::VoiceModel) -> Result<()> {
+            let model_bytes = &model.read_inference_models()?;
+            self.status.insert_model(model.header(), model_bytes)
+        }
 
-            if cfg!(feature = "directml") {
-                Ok(*supported_devices.dml())
-            } else {
-                Ok(*supported_devices.cuda())
-            }
+        /// 音声モデルの読み込みを解除する。
+        pub fn unload_voice_model(&self, voice_model_id: &VoiceModelId) -> Result<()> {
+            self.status.unload_model(voice_model_id)
         }
-    }
 
-    /// ハードウェアアクセラレーションがGPUモードか判定する。
-    pub fn is_gpu_mode(&self) -> bool {
-        self.use_gpu
-    }
+        /// 指定したIDの音声モデルが読み込まれているか判定する。
+        pub fn is_loaded_voice_model(&self, voice_model_id: &VoiceModelId) -> bool {
+            self.status.is_loaded_model(voice_model_id)
+        }
 
-    /// 音声モデルを読み込む。
-    pub fn load_voice_model(&self, model: &crate::blocking::VoiceModel) -> Result<()> {
-        let model_bytes = &model.read_inference_models()?;
-        self.status.insert_model(model.header(), model_bytes)
-    }
+        #[doc(hidden)]
+        pub fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool {
+            self.status.is_loaded_model_by_style_id(style_id)
+        }
 
-    /// 音声モデルの読み込みを解除する。
-    pub fn unload_voice_model(&self, voice_model_id: &VoiceModelId) -> Result<()> {
-        self.status.unload_model(voice_model_id)
-    }
+        /// 今読み込んでいる音声モデルのメタ情報を返す。
+        pub fn metas(&self) -> VoiceModelMeta {
+            self.status.metas()
+        }
 
-    /// 指定したIDの音声モデルが読み込まれているか判定する。
-    pub fn is_loaded_voice_model(&self, voice_model_id: &VoiceModelId) -> bool {
-        self.status.is_loaded_model(voice_model_id)
-    }
+        /// AudioQueryから音声合成を行う。
+        pub fn synthesis(
+            &self,
+            audio_query: &AudioQueryModel,
+            style_id: StyleId,
+            options: &SynthesisOptions,
+        ) -> Result<Vec<u8>> {
+            let speed_scale = *audio_query.speed_scale();
+            let pitch_scale = *audio_query.pitch_scale();
+            let intonation_scale = *audio_query.intonation_scale();
+            let pre_phoneme_length = *audio_query.pre_phoneme_length();
+            let post_phoneme_length = *audio_query.post_phoneme_length();
+
+            let accent_phrases = if options.enable_interrogative_upspeak {
+                adjust_interrogative_accent_phrases(audio_query.accent_phrases().as_slice())
+            } else {
+                audio_query.accent_phrases().clone()
+            };
 
-    #[doc(hidden)]
-    pub fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool {
-        self.status.is_loaded_model_by_style_id(style_id)
-    }
+            let (flatten_moras, phoneme_data_list) = initial_process(&accent_phrases);
 
-    /// 今読み込んでいる音声モデルのメタ情報を返す。
-    pub fn metas(&self) -> VoiceModelMeta {
-        self.status.metas()
-    }
+            let mut phoneme_length_list = vec![pre_phoneme_length];
+            let mut f0_list = vec![0.];
+            let mut voiced_list = vec![false];
+            {
+                let mut sum_of_f0_bigger_than_zero = 0.;
+                let mut count_of_f0_bigger_than_zero = 0;
 
-    /// AudioQueryから音声合成を行う。
-    pub fn synthesis(
-        &self,
-        audio_query: &AudioQueryModel,
-        style_id: StyleId,
-        options: &SynthesisOptions,
-    ) -> Result<Vec<u8>> {
-        let speed_scale = *audio_query.speed_scale();
-        let pitch_scale = *audio_query.pitch_scale();
-        let intonation_scale = *audio_query.intonation_scale();
-        let pre_phoneme_length = *audio_query.pre_phoneme_length();
-        let post_phoneme_length = *audio_query.post_phoneme_length();
-
-        let accent_phrases = if options.enable_interrogative_upspeak {
-            adjust_interrogative_accent_phrases(audio_query.accent_phrases().as_slice())
-        } else {
-            audio_query.accent_phrases().clone()
-        };
+                for mora in flatten_moras {
+                    let consonant_length = *mora.consonant_length();
+                    let vowel_length = *mora.vowel_length();
+                    let pitch = *mora.pitch();
 
-        let (flatten_moras, phoneme_data_list) = initial_process(&accent_phrases);
+                    if let Some(consonant_length) = consonant_length {
+                        phoneme_length_list.push(consonant_length);
+                    }
+                    phoneme_length_list.push(vowel_length);
 
-        let mut phoneme_length_list = vec![pre_phoneme_length];
-        let mut f0_list = vec![0.];
-        let mut voiced_list = vec![false];
-        {
-            let mut sum_of_f0_bigger_than_zero = 0.;
-            let mut count_of_f0_bigger_than_zero = 0;
+                    let f0_single = pitch * 2.0_f32.powf(pitch_scale);
+                    f0_list.push(f0_single);
 
-            for mora in flatten_moras {
-                let consonant_length = *mora.consonant_length();
-                let vowel_length = *mora.vowel_length();
-                let pitch = *mora.pitch();
+                    let bigger_than_zero = f0_single > 0.;
+                    voiced_list.push(bigger_than_zero);
 
-                if let Some(consonant_length) = consonant_length {
-                    phoneme_length_list.push(consonant_length);
+                    if bigger_than_zero {
+                        sum_of_f0_bigger_than_zero += f0_single;
+                        count_of_f0_bigger_than_zero += 1;
+                    }
                 }
-                phoneme_length_list.push(vowel_length);
-
-                let f0_single = pitch * 2.0_f32.powf(pitch_scale);
-                f0_list.push(f0_single);
+                phoneme_length_list.push(post_phoneme_length);
+                f0_list.push(0.);
+                voiced_list.push(false);
+                let mean_f0 = sum_of_f0_bigger_than_zero / (count_of_f0_bigger_than_zero as f32);
+
+                if !mean_f0.is_nan() {
+                    for i in 0..f0_list.len() {
+                        if voiced_list[i] {
+                            f0_list[i] = (f0_list[i] - mean_f0) * intonation_scale + mean_f0;
+                        }
+                    }
+                }
+            }
 
-                let bigger_than_zero = f0_single > 0.;
-                voiced_list.push(bigger_than_zero);
+            let (_, _, vowel_indexes) = split_mora(&phoneme_data_list);
 
-                if bigger_than_zero {
-                    sum_of_f0_bigger_than_zero += f0_single;
-                    count_of_f0_bigger_than_zero += 1;
+            let mut phoneme: Vec<Vec<f32>> = Vec::new();
+            let mut f0: Vec<f32> = Vec::new();
+            {
+                const RATE: f32 = 24000. / 256.;
+                let mut sum_of_phoneme_length = 0;
+                let mut count_of_f0 = 0;
+                let mut vowel_indexes_index = 0;
+
+                for (i, phoneme_length) in phoneme_length_list.iter().enumerate() {
+                    // VOICEVOX ENGINEと挙動を合わせるため、四捨五入ではなく偶数丸めをする
+                    //
+                    // https://github.com/VOICEVOX/voicevox_engine/issues/552
+                    let phoneme_length = ((*phoneme_length * RATE).round_ties_even_() / speed_scale)
+                        .round_ties_even_() as usize;
+                    let phoneme_id = phoneme_data_list[i].phoneme_id();
+
+                    for _ in 0..phoneme_length {
+                        let mut phonemes_vec = vec![0.; OjtPhoneme::num_phoneme()];
+                        phonemes_vec[phoneme_id as usize] = 1.;
+                        phoneme.push(phonemes_vec)
+                    }
+                    sum_of_phoneme_length += phoneme_length;
+
+                    if i as i64 == vowel_indexes[vowel_indexes_index] {
+                        for _ in 0..sum_of_phoneme_length {
+                            f0.push(f0_list[count_of_f0]);
+                        }
+                        count_of_f0 += 1;
+                        sum_of_phoneme_length = 0;
+                        vowel_indexes_index += 1;
+                    }
                 }
             }
-            phoneme_length_list.push(post_phoneme_length);
-            f0_list.push(0.);
-            voiced_list.push(false);
-            let mean_f0 = sum_of_f0_bigger_than_zero / (count_of_f0_bigger_than_zero as f32);
-
-            if !mean_f0.is_nan() {
-                for i in 0..f0_list.len() {
-                    if voiced_list[i] {
-                        f0_list[i] = (f0_list[i] - mean_f0) * intonation_scale + mean_f0;
+
+            // 2次元のvectorを1次元に変換し、アドレスを連続させる
+            let flatten_phoneme = phoneme.into_iter().flatten().collect::<Vec<_>>();
+
+            let wave = &self.decode(
+                f0.len(),
+                OjtPhoneme::num_phoneme(),
+                &f0,
+                &flatten_phoneme,
+                style_id,
+            )?;
+            return Ok(to_wav(wave, audio_query));
+
+            fn adjust_interrogative_accent_phrases(
+                accent_phrases: &[AccentPhraseModel],
+            ) -> Vec<AccentPhraseModel> {
+                accent_phrases
+                    .iter()
+                    .map(|accent_phrase| {
+                        AccentPhraseModel::new(
+                            adjust_interrogative_moras(accent_phrase),
+                            *accent_phrase.accent(),
+                            accent_phrase.pause_mora().clone(),
+                            *accent_phrase.is_interrogative(),
+                        )
+                    })
+                    .collect()
+            }
+
+            fn adjust_interrogative_moras(accent_phrase: &AccentPhraseModel) -> Vec<MoraModel> {
+                let moras = accent_phrase.moras();
+                if *accent_phrase.is_interrogative() && !moras.is_empty() {
+                    let last_mora = moras.last().unwrap();
+                    let last_mora_pitch = *last_mora.pitch();
+                    if last_mora_pitch != 0.0 {
+                        let mut new_moras: Vec<MoraModel> = Vec::with_capacity(moras.len() + 1);
+                        new_moras.extend_from_slice(moras.as_slice());
+                        let interrogative_mora = make_interrogative_mora(last_mora);
+                        new_moras.push(interrogative_mora);
+                        return new_moras;
                     }
                 }
+                moras.clone()
             }
-        }
 
-        let (_, _, vowel_indexes) = split_mora(&phoneme_data_list);
+            fn make_interrogative_mora(last_mora: &MoraModel) -> MoraModel {
+                const FIX_VOWEL_LENGTH: f32 = 0.15;
+                const ADJUST_PITCH: f32 = 0.3;
+                const MAX_PITCH: f32 = 6.5;
 
-        let mut phoneme: Vec<Vec<f32>> = Vec::new();
-        let mut f0: Vec<f32> = Vec::new();
-        {
-            const RATE: f32 = 24000. / 256.;
-            let mut sum_of_phoneme_length = 0;
-            let mut count_of_f0 = 0;
-            let mut vowel_indexes_index = 0;
-
-            for (i, phoneme_length) in phoneme_length_list.iter().enumerate() {
-                // VOICEVOX ENGINEと挙動を合わせるため、四捨五入ではなく偶数丸めをする
-                //
-                // https://github.com/VOICEVOX/voicevox_engine/issues/552
-                let phoneme_length = ((*phoneme_length * RATE).round_ties_even_() / speed_scale)
-                    .round_ties_even_() as usize;
-                let phoneme_id = phoneme_data_list[i].phoneme_id();
-
-                for _ in 0..phoneme_length {
-                    let mut phonemes_vec = vec![0.; OjtPhoneme::num_phoneme()];
-                    phonemes_vec[phoneme_id as usize] = 1.;
-                    phoneme.push(phonemes_vec)
-                }
-                sum_of_phoneme_length += phoneme_length;
+                let pitch = (*last_mora.pitch() + ADJUST_PITCH).min(MAX_PITCH);
 
-                if i as i64 == vowel_indexes[vowel_indexes_index] {
-                    for _ in 0..sum_of_phoneme_length {
-                        f0.push(f0_list[count_of_f0]);
+                MoraModel::new(
+                    mora_to_text(last_mora.vowel()),
+                    None,
+                    None,
+                    last_mora.vowel().clone(),
+                    FIX_VOWEL_LENGTH,
+                    pitch,
+                )
+            }
+
+            fn to_wav(wave: &[f32], audio_query: &AudioQueryModel) -> Vec<u8> {
+                let volume_scale = *audio_query.volume_scale();
+                let output_stereo = *audio_query.output_stereo();
+                let output_sampling_rate = *audio_query.output_sampling_rate();
+
+                // TODO: 44.1kHzなどの対応
+
+                let num_channels: u16 = if output_stereo { 2 } else { 1 };
+                let bit_depth: u16 = 16;
+                let repeat_count: u32 =
+                    (output_sampling_rate / DEFAULT_SAMPLING_RATE) * num_channels as u32;
+                let block_size: u16 = bit_depth * num_channels / 8;
+
+                let bytes_size = wave.len() as u32 * repeat_count * 2;
+                let wave_size = bytes_size + 44;
+
+                let buf: Vec<u8> = Vec::with_capacity(wave_size as usize);
+                let mut cur = Cursor::new(buf);
+
+                cur.write_all("RIFF".as_bytes()).unwrap();
+                cur.write_all(&(wave_size - 8).to_le_bytes()).unwrap();
+                cur.write_all("WAVEfmt ".as_bytes()).unwrap();
+                cur.write_all(&16_u32.to_le_bytes()).unwrap(); // fmt header length
+                cur.write_all(&1_u16.to_le_bytes()).unwrap(); //linear PCM
+                cur.write_all(&num_channels.to_le_bytes()).unwrap();
+                cur.write_all(&output_sampling_rate.to_le_bytes()).unwrap();
+
+                let block_rate = output_sampling_rate * block_size as u32;
+
+                cur.write_all(&block_rate.to_le_bytes()).unwrap();
+                cur.write_all(&block_size.to_le_bytes()).unwrap();
+                cur.write_all(&bit_depth.to_le_bytes()).unwrap();
+                cur.write_all("data".as_bytes()).unwrap();
+                cur.write_all(&bytes_size.to_le_bytes()).unwrap();
+
+                for value in wave {
+                    let v = (value * volume_scale).clamp(-1., 1.);
+                    let data = (v * 0x7fff as f32) as i16;
+                    for _ in 0..repeat_count {
+                        cur.write_all(&data.to_le_bytes()).unwrap();
                     }
-                    count_of_f0 += 1;
-                    sum_of_phoneme_length = 0;
-                    vowel_indexes_index += 1;
                 }
+
+                cur.into_inner()
             }
         }
 
-        // 2次元のvectorを1次元に変換し、アドレスを連続させる
-        let flatten_phoneme = phoneme.into_iter().flatten().collect::<Vec<_>>();
+        /// AquesTalk風記法からAccentPhrase (アクセント句)の配列を生成する。
+        ///
+        /// # Example
+        ///
+        #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537
+        #[cfg_attr(not(windows), doc = "```")]
+        /// # #[tokio::main]
+        /// # async fn main() -> anyhow::Result<()> {
+        /// # let synthesizer =
+        /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
+        /// #         test_util::OPEN_JTALK_DIC_DIR,
+        /// #     )
+        /// #     .await?;
+        /// #
+        /// use voicevox_core::StyleId;
+        ///
+        /// let accent_phrases = synthesizer
+        ///     .create_accent_phrases_from_kana("コンニチワ'", StyleId::new(302))
+        ///     .await?;
+        /// #
+        /// # Ok(())
+        /// # }
+        /// ```
+        pub fn create_accent_phrases_from_kana(
+            &self,
+            kana: &str,
+            style_id: StyleId,
+        ) -> Result<Vec<AccentPhraseModel>> {
+            self.replace_mora_data(&parse_kana(kana)?, style_id)
+        }
 
-        let wave = &self.decode(
-            f0.len(),
-            OjtPhoneme::num_phoneme(),
-            &f0,
-            &flatten_phoneme,
-            style_id,
-        )?;
-        return Ok(to_wav(wave, audio_query));
+        /// AccentPhraseの配列の音高・音素長を、特定の声で生成しなおす。
+        pub fn replace_mora_data(
+            &self,
+            accent_phrases: &[AccentPhraseModel],
+            style_id: StyleId,
+        ) -> Result<Vec<AccentPhraseModel>> {
+            let accent_phrases = self.replace_phoneme_length(accent_phrases, style_id)?;
+            self.replace_mora_pitch(&accent_phrases, style_id)
+        }
 
-        fn adjust_interrogative_accent_phrases(
+        /// AccentPhraseの配列の音素長を、特定の声で生成しなおす。
+        pub fn replace_phoneme_length(
+            &self,
             accent_phrases: &[AccentPhraseModel],
-        ) -> Vec<AccentPhraseModel> {
-            accent_phrases
+            style_id: StyleId,
+        ) -> Result<Vec<AccentPhraseModel>> {
+            let (_, phoneme_data_list) = initial_process(accent_phrases);
+
+            let (_, _, vowel_indexes_data) = split_mora(&phoneme_data_list);
+
+            let phoneme_list_s: Vec<i64> = phoneme_data_list
+                .iter()
+                .map(|phoneme_data| phoneme_data.phoneme_id())
+                .collect();
+            let phoneme_length = self.predict_duration(&phoneme_list_s, style_id)?;
+
+            let mut index = 0;
+            let new_accent_phrases = accent_phrases
                 .iter()
                 .map(|accent_phrase| {
                     AccentPhraseModel::new(
-                        adjust_interrogative_moras(accent_phrase),
+                        accent_phrase
+                            .moras()
+                            .iter()
+                            .map(|mora| {
+                                let new_mora = MoraModel::new(
+                                    mora.text().clone(),
+                                    mora.consonant().clone(),
+                                    mora.consonant().as_ref().map(|_| {
+                                        phoneme_length[vowel_indexes_data[index + 1] as usize - 1]
+                                    }),
+                                    mora.vowel().clone(),
+                                    phoneme_length[vowel_indexes_data[index + 1] as usize],
+                                    *mora.pitch(),
+                                );
+                                index += 1;
+                                new_mora
+                            })
+                            .collect(),
                         *accent_phrase.accent(),
-                        accent_phrase.pause_mora().clone(),
+                        accent_phrase.pause_mora().as_ref().map(|pause_mora| {
+                            let new_pause_mora = MoraModel::new(
+                                pause_mora.text().clone(),
+                                pause_mora.consonant().clone(),
+                                *pause_mora.consonant_length(),
+                                pause_mora.vowel().clone(),
+                                phoneme_length[vowel_indexes_data[index + 1] as usize],
+                                *pause_mora.pitch(),
+                            );
+                            index += 1;
+                            new_pause_mora
+                        }),
                         *accent_phrase.is_interrogative(),
                     )
                 })
-                .collect()
+                .collect();
+
+            Ok(new_accent_phrases)
         }
 
-        fn adjust_interrogative_moras(accent_phrase: &AccentPhraseModel) -> Vec<MoraModel> {
-            let moras = accent_phrase.moras();
-            if *accent_phrase.is_interrogative() && !moras.is_empty() {
-                let last_mora = moras.last().unwrap();
-                let last_mora_pitch = *last_mora.pitch();
-                if last_mora_pitch != 0.0 {
-                    let mut new_moras: Vec<MoraModel> = Vec::with_capacity(moras.len() + 1);
-                    new_moras.extend_from_slice(moras.as_slice());
-                    let interrogative_mora = make_interrogative_mora(last_mora);
-                    new_moras.push(interrogative_mora);
-                    return new_moras;
+        /// AccentPhraseの配列の音高を、特定の声で生成しなおす。
+        pub fn replace_mora_pitch(
+            &self,
+            accent_phrases: &[AccentPhraseModel],
+            style_id: StyleId,
+        ) -> Result<Vec<AccentPhraseModel>> {
+            let (_, phoneme_data_list) = initial_process(accent_phrases);
+
+            let mut base_start_accent_list = vec![0];
+            let mut base_end_accent_list = vec![0];
+            let mut base_start_accent_phrase_list = vec![0];
+            let mut base_end_accent_phrase_list = vec![0];
+            for accent_phrase in accent_phrases {
+                let mut accent = usize::from(*accent_phrase.accent() != 1);
+                create_one_accent_list(&mut base_start_accent_list, accent_phrase, accent as i32);
+
+                accent = *accent_phrase.accent() - 1;
+                create_one_accent_list(&mut base_end_accent_list, accent_phrase, accent as i32);
+                create_one_accent_list(&mut base_start_accent_phrase_list, accent_phrase, 0);
+                create_one_accent_list(&mut base_end_accent_phrase_list, accent_phrase, -1);
+            }
+            base_start_accent_list.push(0);
+            base_end_accent_list.push(0);
+            base_start_accent_phrase_list.push(0);
+            base_end_accent_phrase_list.push(0);
+
+            let (consonant_phoneme_data_list, vowel_phoneme_data_list, vowel_indexes) =
+                split_mora(&phoneme_data_list);
+
+            let consonant_phoneme_list: Vec<i64> = consonant_phoneme_data_list
+                .iter()
+                .map(|phoneme_data| phoneme_data.phoneme_id())
+                .collect();
+            let vowel_phoneme_list: Vec<i64> = vowel_phoneme_data_list
+                .iter()
+                .map(|phoneme_data| phoneme_data.phoneme_id())
+                .collect();
+
+            let mut start_accent_list = Vec::with_capacity(vowel_indexes.len());
+            let mut end_accent_list = Vec::with_capacity(vowel_indexes.len());
+            let mut start_accent_phrase_list = Vec::with_capacity(vowel_indexes.len());
+            let mut end_accent_phrase_list = Vec::with_capacity(vowel_indexes.len());
+
+            for vowel_index in vowel_indexes {
+                start_accent_list.push(base_start_accent_list[vowel_index as usize]);
+                end_accent_list.push(base_end_accent_list[vowel_index as usize]);
+                start_accent_phrase_list.push(base_start_accent_phrase_list[vowel_index as usize]);
+                end_accent_phrase_list.push(base_end_accent_phrase_list[vowel_index as usize]);
+            }
+
+            let mut f0_list = self.predict_intonation(
+                vowel_phoneme_list.len(),
+                &vowel_phoneme_list,
+                &consonant_phoneme_list,
+                &start_accent_list,
+                &end_accent_list,
+                &start_accent_phrase_list,
+                &end_accent_phrase_list,
+                style_id,
+            )?;
+
+            for i in 0..vowel_phoneme_data_list.len() {
+                const UNVOICED_MORA_PHONEME_LIST: &[&str] = &["A", "I", "U", "E", "O", "cl", "pau"];
+
+                if UNVOICED_MORA_PHONEME_LIST
+                    .iter()
+                    .any(|phoneme| *phoneme == vowel_phoneme_data_list[i].phoneme())
+                {
+                    f0_list[i] = 0.;
                 }
             }
-            moras.clone()
-        }
 
-        fn make_interrogative_mora(last_mora: &MoraModel) -> MoraModel {
-            const FIX_VOWEL_LENGTH: f32 = 0.15;
-            const ADJUST_PITCH: f32 = 0.3;
-            const MAX_PITCH: f32 = 6.5;
+            let mut index = 0;
+            let new_accent_phrases = accent_phrases
+                .iter()
+                .map(|accent_phrase| {
+                    AccentPhraseModel::new(
+                        accent_phrase
+                            .moras()
+                            .iter()
+                            .map(|mora| {
+                                let new_mora = MoraModel::new(
+                                    mora.text().clone(),
+                                    mora.consonant().clone(),
+                                    *mora.consonant_length(),
+                                    mora.vowel().clone(),
+                                    *mora.vowel_length(),
+                                    f0_list[index + 1],
+                                );
+                                index += 1;
+                                new_mora
+                            })
+                            .collect(),
+                        *accent_phrase.accent(),
+                        accent_phrase.pause_mora().as_ref().map(|pause_mora| {
+                            let new_pause_mora = MoraModel::new(
+                                pause_mora.text().clone(),
+                                pause_mora.consonant().clone(),
+                                *pause_mora.consonant_length(),
+                                pause_mora.vowel().clone(),
+                                *pause_mora.vowel_length(),
+                                f0_list[index + 1],
+                            );
+                            index += 1;
+                            new_pause_mora
+                        }),
+                        *accent_phrase.is_interrogative(),
+                    )
+                })
+                .collect();
 
-            let pitch = (*last_mora.pitch() + ADJUST_PITCH).min(MAX_PITCH);
+            return Ok(new_accent_phrases);
 
-            MoraModel::new(
-                mora_to_text(last_mora.vowel()),
-                None,
-                None,
-                last_mora.vowel().clone(),
-                FIX_VOWEL_LENGTH,
-                pitch,
-            )
-        }
+            fn create_one_accent_list(
+                accent_list: &mut Vec<i64>,
+                accent_phrase: &AccentPhraseModel,
+                point: i32,
+            ) {
+                let mut one_accent_list: Vec<i64> = Vec::new();
 
-        fn to_wav(wave: &[f32], audio_query: &AudioQueryModel) -> Vec<u8> {
-            let volume_scale = *audio_query.volume_scale();
-            let output_stereo = *audio_query.output_stereo();
-            let output_sampling_rate = *audio_query.output_sampling_rate();
-
-            // TODO: 44.1kHzなどの対応
-
-            let num_channels: u16 = if output_stereo { 2 } else { 1 };
-            let bit_depth: u16 = 16;
-            let repeat_count: u32 =
-                (output_sampling_rate / DEFAULT_SAMPLING_RATE) * num_channels as u32;
-            let block_size: u16 = bit_depth * num_channels / 8;
-
-            let bytes_size = wave.len() as u32 * repeat_count * 2;
-            let wave_size = bytes_size + 44;
-
-            let buf: Vec<u8> = Vec::with_capacity(wave_size as usize);
-            let mut cur = Cursor::new(buf);
-
-            cur.write_all("RIFF".as_bytes()).unwrap();
-            cur.write_all(&(wave_size - 8).to_le_bytes()).unwrap();
-            cur.write_all("WAVEfmt ".as_bytes()).unwrap();
-            cur.write_all(&16_u32.to_le_bytes()).unwrap(); // fmt header length
-            cur.write_all(&1_u16.to_le_bytes()).unwrap(); //linear PCM
-            cur.write_all(&num_channels.to_le_bytes()).unwrap();
-            cur.write_all(&output_sampling_rate.to_le_bytes()).unwrap();
-
-            let block_rate = output_sampling_rate * block_size as u32;
-
-            cur.write_all(&block_rate.to_le_bytes()).unwrap();
-            cur.write_all(&block_size.to_le_bytes()).unwrap();
-            cur.write_all(&bit_depth.to_le_bytes()).unwrap();
-            cur.write_all("data".as_bytes()).unwrap();
-            cur.write_all(&bytes_size.to_le_bytes()).unwrap();
-
-            for value in wave {
-                let v = (value * volume_scale).clamp(-1., 1.);
-                let data = (v * 0x7fff as f32) as i16;
-                for _ in 0..repeat_count {
-                    cur.write_all(&data.to_le_bytes()).unwrap();
+                for (i, mora) in accent_phrase.moras().iter().enumerate() {
+                    let value = (i as i32 == point
+                        || (point < 0
+                            && i == (accent_phrase.moras().len() as i32 + point) as usize))
+                        .into();
+                    one_accent_list.push(value);
+                    if mora.consonant().is_some() {
+                        one_accent_list.push(value);
+                    }
+                }
+                if accent_phrase.pause_mora().is_some() {
+                    one_accent_list.push(0);
                 }
+                accent_list.extend(one_accent_list)
             }
+        }
 
-            cur.into_inner()
+        /// AquesTalk風記法から[AudioQuery]を生成する。
+        ///
+        /// # Example
+        ///
+        #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537
+        #[cfg_attr(not(windows), doc = "```")]
+        /// # #[tokio::main]
+        /// # async fn main() -> anyhow::Result<()> {
+        /// # let synthesizer =
+        /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
+        /// #         test_util::OPEN_JTALK_DIC_DIR,
+        /// #     )
+        /// #     .await?;
+        /// #
+        /// use voicevox_core::StyleId;
+        ///
+        /// let audio_query = synthesizer
+        ///     .audio_query_from_kana("コンニチワ'", StyleId::new(302))
+        ///     .await?;
+        /// #
+        /// # Ok(())
+        /// # }
+        /// ```
+        ///
+        /// [AudioQuery]: crate::AudioQueryModel
+        pub fn audio_query_from_kana(
+            &self,
+            kana: &str,
+            style_id: StyleId,
+        ) -> Result<AudioQueryModel> {
+            let accent_phrases = self.create_accent_phrases_from_kana(kana, style_id)?;
+            Ok(AudioQueryModel::from_accent_phrases(accent_phrases)
+                .with_kana(Some(kana.to_owned())))
         }
-    }
 
-    /// AquesTalk風記法からAccentPhrase (アクセント句)の配列を生成する。
-    ///
-    /// # Example
-    ///
-    #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537
-    #[cfg_attr(not(windows), doc = "```")]
-    /// # #[tokio::main]
-    /// # async fn main() -> anyhow::Result<()> {
-    /// # let synthesizer =
-    /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
-    /// #         test_util::OPEN_JTALK_DIC_DIR,
-    /// #     )
-    /// #     .await?;
-    /// #
-    /// use voicevox_core::StyleId;
-    ///
-    /// let accent_phrases = synthesizer
-    ///     .create_accent_phrases_from_kana("コンニチワ'", StyleId::new(302))
-    ///     .await?;
-    /// #
-    /// # Ok(())
-    /// # }
-    /// ```
-    pub fn create_accent_phrases_from_kana(
-        &self,
-        kana: &str,
-        style_id: StyleId,
-    ) -> Result<Vec<AccentPhraseModel>> {
-        self.replace_mora_data(&parse_kana(kana)?, style_id)
+        /// AquesTalk風記法から音声合成を行う。
+        pub fn tts_from_kana(
+            &self,
+            kana: &str,
+            style_id: StyleId,
+            options: &TtsOptions,
+        ) -> Result<Vec<u8>> {
+            let audio_query = &self.audio_query_from_kana(kana, style_id)?;
+            self.synthesis(audio_query, style_id, &SynthesisOptions::from(options))
+        }
     }
-}
 
-impl<T: FullcontextExtractor> self::blocking::Synthesizer<T> {
-    /// 日本語のテキストからAccentPhrase (アクセント句)の配列を生成する。
-    ///
-    /// # Example
-    ///
-    #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537
-    #[cfg_attr(not(windows), doc = "```")]
-    /// # #[tokio::main]
-    /// # async fn main() -> anyhow::Result<()> {
-    /// # let synthesizer =
-    /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
-    /// #         test_util::OPEN_JTALK_DIC_DIR,
-    /// #     )
-    /// #     .await?;
-    /// #
-    /// use voicevox_core::StyleId;
-    ///
-    /// let accent_phrases = synthesizer
-    ///     .create_accent_phrases("こんにちは", StyleId::new(302))
-    ///     .await?;
-    /// #
-    /// # Ok(())
-    /// # }
-    /// ```
-    pub fn create_accent_phrases(
-        &self,
-        text: &str,
-        style_id: StyleId,
-    ) -> Result<Vec<AccentPhraseModel>> {
-        if text.is_empty() {
-            return Ok(Vec::new());
-        }
+    impl<O: FullcontextExtractor> self::Synthesizer<O> {
+        /// 日本語のテキストからAccentPhrase (アクセント句)の配列を生成する。
+        ///
+        /// # Example
+        ///
+        #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537
+        #[cfg_attr(not(windows), doc = "```")]
+        /// # #[tokio::main]
+        /// # async fn main() -> anyhow::Result<()> {
+        /// # let synthesizer =
+        /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
+        /// #         test_util::OPEN_JTALK_DIC_DIR,
+        /// #     )
+        /// #     .await?;
+        /// #
+        /// use voicevox_core::StyleId;
+        ///
+        /// let accent_phrases = synthesizer
+        ///     .create_accent_phrases("こんにちは", StyleId::new(302))
+        ///     .await?;
+        /// #
+        /// # Ok(())
+        /// # }
+        /// ```
+        pub fn create_accent_phrases(
+            &self,
+            text: &str,
+            style_id: StyleId,
+        ) -> Result<Vec<AccentPhraseModel>> {
+            if text.is_empty() {
+                return Ok(Vec::new());
+            }
 
-        let utterance = Utterance::extract_full_context_label(&self.open_jtalk, text)?;
+            let utterance = Utterance::extract_full_context_label(&self.open_jtalk, text)?;
 
-        let accent_phrases: Vec<AccentPhraseModel> = utterance
-            .breath_groups()
-            .iter()
-            .enumerate()
-            .fold(Vec::new(), |mut accum_vec, (i, breath_group)| {
-                accum_vec.extend(breath_group.accent_phrases().iter().enumerate().map(
-                    |(j, accent_phrase)| {
-                        let moras = accent_phrase
-                            .moras()
-                            .iter()
-                            .map(|mora| {
-                                let mora_text = mora
-                                    .phonemes()
-                                    .iter()
-                                    .map(|phoneme| phoneme.phoneme().to_string())
-                                    .collect::<Vec<_>>()
-                                    .join("");
-
-                                let (consonant, consonant_length) =
-                                    if let Some(consonant) = mora.consonant() {
-                                        (Some(consonant.phoneme().to_string()), Some(0.))
-                                    } else {
-                                        (None, None)
-                                    };
-
-                                MoraModel::new(
-                                    mora_to_text(mora_text),
-                                    consonant,
-                                    consonant_length,
-                                    mora.vowel().phoneme().into(),
+            let accent_phrases: Vec<AccentPhraseModel> = utterance
+                .breath_groups()
+                .iter()
+                .enumerate()
+                .fold(Vec::new(), |mut accum_vec, (i, breath_group)| {
+                    accum_vec.extend(breath_group.accent_phrases().iter().enumerate().map(
+                        |(j, accent_phrase)| {
+                            let moras = accent_phrase
+                                .moras()
+                                .iter()
+                                .map(|mora| {
+                                    let mora_text = mora
+                                        .phonemes()
+                                        .iter()
+                                        .map(|phoneme| phoneme.phoneme().to_string())
+                                        .collect::<Vec<_>>()
+                                        .join("");
+
+                                    let (consonant, consonant_length) =
+                                        if let Some(consonant) = mora.consonant() {
+                                            (Some(consonant.phoneme().to_string()), Some(0.))
+                                        } else {
+                                            (None, None)
+                                        };
+
+                                    MoraModel::new(
+                                        mora_to_text(mora_text),
+                                        consonant,
+                                        consonant_length,
+                                        mora.vowel().phoneme().into(),
+                                        0.,
+                                        0.,
+                                    )
+                                })
+                                .collect();
+
+                            let pause_mora = if i != utterance.breath_groups().len() - 1
+                                && j == breath_group.accent_phrases().len() - 1
+                            {
+                                Some(MoraModel::new(
+                                    "、".into(),
+                                    None,
+                                    None,
+                                    "pau".into(),
                                     0.,
                                     0.,
-                                )
-                            })
-                            .collect();
-
-                        let pause_mora = if i != utterance.breath_groups().len() - 1
-                            && j == breath_group.accent_phrases().len() - 1
-                        {
-                            Some(MoraModel::new(
-                                "、".into(),
-                                None,
-                                None,
-                                "pau".into(),
-                                0.,
-                                0.,
-                            ))
-                        } else {
-                            None
-                        };
-
-                        AccentPhraseModel::new(
-                            moras,
-                            *accent_phrase.accent(),
-                            pause_mora,
-                            *accent_phrase.is_interrogative(),
-                        )
-                    },
-                ));
+                                ))
+                            } else {
+                                None
+                            };
+
+                            AccentPhraseModel::new(
+                                moras,
+                                *accent_phrase.accent(),
+                                pause_mora,
+                                *accent_phrase.is_interrogative(),
+                            )
+                        },
+                    ));
+
+                    accum_vec
+                });
+
+            self.replace_mora_data(&accent_phrases, style_id)
+        }
 
-                accum_vec
-            });
+        /// 日本語のテキストから[AudioQuery]を生成する。
+        ///
+        /// # Examples
+        ///
+        #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537
+        #[cfg_attr(not(windows), doc = "```")]
+        /// # #[tokio::main]
+        /// # async fn main() -> anyhow::Result<()> {
+        /// # let synthesizer =
+        /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
+        /// #         test_util::OPEN_JTALK_DIC_DIR,
+        /// #     )
+        /// #     .await?;
+        /// #
+        /// use voicevox_core::StyleId;
+        ///
+        /// let audio_query = synthesizer
+        ///     .audio_query("こんにちは", StyleId::new(302))
+        ///     .await?;
+        /// #
+        /// # Ok(())
+        /// # }
+        /// ```
+        ///
+        /// [AudioQuery]: crate::AudioQueryModel
+        pub fn audio_query(&self, text: &str, style_id: StyleId) -> Result<AudioQueryModel> {
+            let accent_phrases = self.create_accent_phrases(text, style_id)?;
+            Ok(AudioQueryModel::from_accent_phrases(accent_phrases))
+        }
 
-        self.replace_mora_data(&accent_phrases, style_id)
+        /// 日本語のテキストから音声合成を行う。
+        pub fn tts(&self, text: &str, style_id: StyleId, options: &TtsOptions) -> Result<Vec<u8>> {
+            let audio_query = &self.audio_query(text, style_id)?;
+            self.synthesis(audio_query, style_id, &SynthesisOptions::from(options))
+        }
     }
-}
 
-impl<O> self::blocking::Synthesizer<O> {
-    /// AccentPhraseの配列の音高・音素長を、特定の声で生成しなおす。
-    pub fn replace_mora_data(
-        &self,
-        accent_phrases: &[AccentPhraseModel],
-        style_id: StyleId,
-    ) -> Result<Vec<AccentPhraseModel>> {
-        let accent_phrases = self.replace_phoneme_length(accent_phrases, style_id)?;
-        self.replace_mora_pitch(&accent_phrases, style_id)
+    pub trait PerformInference {
+        /// `predict_duration`を実行する。
+        ///
+        /// # Performance
+        ///
+        /// CPU-boundな操作であるため、非同期ランタイム上では直接実行されるべきではない。
+        fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result<Vec<f32>>;
+
+        /// `predict_intonation`を実行する。
+        ///
+        /// # Performance
+        ///
+        /// CPU-boundな操作であるため、非同期ランタイム上では直接実行されるべきではない。
+        #[allow(clippy::too_many_arguments)]
+        fn predict_intonation(
+            &self,
+            length: usize,
+            vowel_phoneme_vector: &[i64],
+            consonant_phoneme_vector: &[i64],
+            start_accent_vector: &[i64],
+            end_accent_vector: &[i64],
+            start_accent_phrase_vector: &[i64],
+            end_accent_phrase_vector: &[i64],
+            style_id: StyleId,
+        ) -> Result<Vec<f32>>;
+
+        /// `decode`を実行する。
+        ///
+        /// # Performance
+        ///
+        /// CPU/GPU-boundな操作であるため、非同期ランタイム上では直接実行されるべきではない。
+        fn decode(
+            &self,
+            length: usize,
+            phoneme_size: usize,
+            f0: &[f32],
+            phoneme_vector: &[f32],
+            style_id: StyleId,
+        ) -> Result<Vec<f32>>;
     }
 
-    /// AccentPhraseの配列の音素長を、特定の声で生成しなおす。
-    pub fn replace_phoneme_length(
-        &self,
-        accent_phrases: &[AccentPhraseModel],
-        style_id: StyleId,
-    ) -> Result<Vec<AccentPhraseModel>> {
-        let (_, phoneme_data_list) = initial_process(accent_phrases);
+    impl<O> PerformInference for self::Synthesizer<O> {
+        fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result<Vec<f32>> {
+            // FIXME: `Status::ids_for`があるため、ここは不要なはず
+            if !self.status.validate_speaker_id(style_id) {
+                return Err(ErrorRepr::StyleNotFound { style_id }.into());
+            }
 
-        let (_, _, vowel_indexes_data) = split_mora(&phoneme_data_list);
+            let (model_id, model_inner_id) = self.status.ids_for(style_id)?;
+
+            let PredictDurationOutput {
+                phoneme_length: output,
+            } = self.status.run_session(
+                &model_id,
+                PredictDurationInput {
+                    phoneme_list: ndarray::arr1(phoneme_vector),
+                    speaker_id: ndarray::arr1(&[model_inner_id.raw_id().into()]),
+                },
+            )?;
+            let mut output = output.into_raw_vec();
+
+            for output_item in output.iter_mut() {
+                if *output_item < PHONEME_LENGTH_MINIMAL {
+                    *output_item = PHONEME_LENGTH_MINIMAL;
+                }
+            }
 
-        let phoneme_list_s: Vec<i64> = phoneme_data_list
-            .iter()
-            .map(|phoneme_data| phoneme_data.phoneme_id())
-            .collect();
-        let phoneme_length = self.predict_duration(&phoneme_list_s, style_id)?;
+            return Ok(output);
 
-        let mut index = 0;
-        let new_accent_phrases = accent_phrases
-            .iter()
-            .map(|accent_phrase| {
-                AccentPhraseModel::new(
-                    accent_phrase
-                        .moras()
-                        .iter()
-                        .map(|mora| {
-                            let new_mora = MoraModel::new(
-                                mora.text().clone(),
-                                mora.consonant().clone(),
-                                mora.consonant().as_ref().map(|_| {
-                                    phoneme_length[vowel_indexes_data[index + 1] as usize - 1]
-                                }),
-                                mora.vowel().clone(),
-                                phoneme_length[vowel_indexes_data[index + 1] as usize],
-                                *mora.pitch(),
-                            );
-                            index += 1;
-                            new_mora
-                        })
-                        .collect(),
-                    *accent_phrase.accent(),
-                    accent_phrase.pause_mora().as_ref().map(|pause_mora| {
-                        let new_pause_mora = MoraModel::new(
-                            pause_mora.text().clone(),
-                            pause_mora.consonant().clone(),
-                            *pause_mora.consonant_length(),
-                            pause_mora.vowel().clone(),
-                            phoneme_length[vowel_indexes_data[index + 1] as usize],
-                            *pause_mora.pitch(),
-                        );
-                        index += 1;
-                        new_pause_mora
-                    }),
-                    *accent_phrase.is_interrogative(),
-                )
-            })
-            .collect();
+            const PHONEME_LENGTH_MINIMAL: f32 = 0.01;
+        }
 
-        Ok(new_accent_phrases)
-    }
+        fn predict_intonation(
+            &self,
+            length: usize,
+            vowel_phoneme_vector: &[i64],
+            consonant_phoneme_vector: &[i64],
+            start_accent_vector: &[i64],
+            end_accent_vector: &[i64],
+            start_accent_phrase_vector: &[i64],
+            end_accent_phrase_vector: &[i64],
+            style_id: StyleId,
+        ) -> Result<Vec<f32>> {
+            // FIXME: `Status::ids_for`があるため、ここは不要なはず
+            if !self.status.validate_speaker_id(style_id) {
+                return Err(ErrorRepr::StyleNotFound { style_id }.into());
+            }
 
-    /// AccentPhraseの配列の音高を、特定の声で生成しなおす。
-    pub fn replace_mora_pitch(
-        &self,
-        accent_phrases: &[AccentPhraseModel],
-        style_id: StyleId,
-    ) -> Result<Vec<AccentPhraseModel>> {
-        let (_, phoneme_data_list) = initial_process(accent_phrases);
-
-        let mut base_start_accent_list = vec![0];
-        let mut base_end_accent_list = vec![0];
-        let mut base_start_accent_phrase_list = vec![0];
-        let mut base_end_accent_phrase_list = vec![0];
-        for accent_phrase in accent_phrases {
-            let mut accent = usize::from(*accent_phrase.accent() != 1);
-            create_one_accent_list(&mut base_start_accent_list, accent_phrase, accent as i32);
-
-            accent = *accent_phrase.accent() - 1;
-            create_one_accent_list(&mut base_end_accent_list, accent_phrase, accent as i32);
-            create_one_accent_list(&mut base_start_accent_phrase_list, accent_phrase, 0);
-            create_one_accent_list(&mut base_end_accent_phrase_list, accent_phrase, -1);
+            let (model_id, model_inner_id) = self.status.ids_for(style_id)?;
+
+            let PredictIntonationOutput { f0_list: output } = self.status.run_session(
+                &model_id,
+                PredictIntonationInput {
+                    length: ndarray::arr0(length as i64),
+                    vowel_phoneme_list: ndarray::arr1(vowel_phoneme_vector),
+                    consonant_phoneme_list: ndarray::arr1(consonant_phoneme_vector),
+                    start_accent_list: ndarray::arr1(start_accent_vector),
+                    end_accent_list: ndarray::arr1(end_accent_vector),
+                    start_accent_phrase_list: ndarray::arr1(start_accent_phrase_vector),
+                    end_accent_phrase_list: ndarray::arr1(end_accent_phrase_vector),
+                    speaker_id: ndarray::arr1(&[model_inner_id.raw_id().into()]),
+                },
+            )?;
+
+            Ok(output.into_raw_vec())
         }
-        base_start_accent_list.push(0);
-        base_end_accent_list.push(0);
-        base_start_accent_phrase_list.push(0);
-        base_end_accent_phrase_list.push(0);
 
-        let (consonant_phoneme_data_list, vowel_phoneme_data_list, vowel_indexes) =
-            split_mora(&phoneme_data_list);
+        fn decode(
+            &self,
+            length: usize,
+            phoneme_size: usize,
+            f0: &[f32],
+            phoneme_vector: &[f32],
+            style_id: StyleId,
+        ) -> Result<Vec<f32>> {
+            // FIXME: `Status::ids_for`があるため、ここは不要なはず
+            if !self.status.validate_speaker_id(style_id) {
+                return Err(ErrorRepr::StyleNotFound { style_id }.into());
+            }
 
-        let consonant_phoneme_list: Vec<i64> = consonant_phoneme_data_list
-            .iter()
-            .map(|phoneme_data| phoneme_data.phoneme_id())
-            .collect();
-        let vowel_phoneme_list: Vec<i64> = vowel_phoneme_data_list
-            .iter()
-            .map(|phoneme_data| phoneme_data.phoneme_id())
-            .collect();
+            let (model_id, model_inner_id) = self.status.ids_for(style_id)?;
+
+            // 音が途切れてしまうのを避けるworkaround処理が入っている
+            // TODO: 改善したらここのpadding処理を取り除く
+            const PADDING_SIZE: f64 = 0.4;
+            let padding_size =
+                ((PADDING_SIZE * DEFAULT_SAMPLING_RATE as f64) / 256.0).round() as usize;
+            let start_and_end_padding_size = 2 * padding_size;
+            let length_with_padding = length + start_and_end_padding_size;
+            let f0_with_padding = make_f0_with_padding(f0, length_with_padding, padding_size);
+
+            let phoneme_with_padding = make_phoneme_with_padding(
+                phoneme_vector,
+                phoneme_size,
+                length_with_padding,
+                padding_size,
+            );
 
-        let mut start_accent_list = Vec::with_capacity(vowel_indexes.len());
-        let mut end_accent_list = Vec::with_capacity(vowel_indexes.len());
-        let mut start_accent_phrase_list = Vec::with_capacity(vowel_indexes.len());
-        let mut end_accent_phrase_list = Vec::with_capacity(vowel_indexes.len());
+            let DecodeOutput { wave: output } = self.status.run_session(
+                &model_id,
+                DecodeInput {
+                    f0: ndarray::arr1(&f0_with_padding)
+                        .into_shape([length_with_padding, 1])
+                        .unwrap(),
+                    phoneme: ndarray::arr1(&phoneme_with_padding)
+                        .into_shape([length_with_padding, phoneme_size])
+                        .unwrap(),
+                    speaker_id: ndarray::arr1(&[model_inner_id.raw_id().into()]),
+                },
+            )?;
+
+            return Ok(trim_padding_from_output(
+                output.into_raw_vec(),
+                padding_size,
+            ));
+
+            fn make_f0_with_padding(
+                f0_slice: &[f32],
+                length_with_padding: usize,
+                padding_size: usize,
+            ) -> Vec<f32> {
+                // 音が途切れてしまうのを避けるworkaround処理
+                // 改善したらこの関数を削除する
+                let mut f0_with_padding = Vec::with_capacity(length_with_padding);
+                let padding = vec![0.0; padding_size];
+                f0_with_padding.extend_from_slice(&padding);
+                f0_with_padding.extend_from_slice(f0_slice);
+                f0_with_padding.extend_from_slice(&padding);
+                f0_with_padding
+            }
 
-        for vowel_index in vowel_indexes {
-            start_accent_list.push(base_start_accent_list[vowel_index as usize]);
-            end_accent_list.push(base_end_accent_list[vowel_index as usize]);
-            start_accent_phrase_list.push(base_start_accent_phrase_list[vowel_index as usize]);
-            end_accent_phrase_list.push(base_end_accent_phrase_list[vowel_index as usize]);
-        }
+            fn make_phoneme_with_padding(
+                phoneme_slice: &[f32],
+                phoneme_size: usize,
+                length_with_padding: usize,
+                padding_size: usize,
+            ) -> Vec<f32> {
+                // 音が途切れてしまうのを避けるworkaround処理
+                // 改善したらこの関数を削除する
+                let mut padding_phoneme = vec![0.0; phoneme_size];
+                padding_phoneme[0] = 1.0;
+                let padding_phoneme_len = padding_phoneme.len();
+                let padding_phonemes: Vec<f32> = padding_phoneme
+                    .into_iter()
+                    .cycle()
+                    .take(padding_phoneme_len * padding_size)
+                    .collect();
+                let mut phoneme_with_padding =
+                    Vec::with_capacity(phoneme_size * length_with_padding);
+                phoneme_with_padding.extend_from_slice(&padding_phonemes);
+                phoneme_with_padding.extend_from_slice(phoneme_slice);
+                phoneme_with_padding.extend_from_slice(&padding_phonemes);
+
+                phoneme_with_padding
+            }
 
-        let mut f0_list = self.predict_intonation(
-            vowel_phoneme_list.len(),
-            &vowel_phoneme_list,
-            &consonant_phoneme_list,
-            &start_accent_list,
-            &end_accent_list,
-            &start_accent_phrase_list,
-            &end_accent_phrase_list,
-            style_id,
-        )?;
-
-        for i in 0..vowel_phoneme_data_list.len() {
-            const UNVOICED_MORA_PHONEME_LIST: &[&str] = &["A", "I", "U", "E", "O", "cl", "pau"];
-
-            if UNVOICED_MORA_PHONEME_LIST
-                .iter()
-                .any(|phoneme| *phoneme == vowel_phoneme_data_list[i].phoneme())
-            {
-                f0_list[i] = 0.;
+            fn trim_padding_from_output(mut output: Vec<f32>, padding_f0_size: usize) -> Vec<f32> {
+                // 音が途切れてしまうのを避けるworkaround処理
+                // 改善したらこの関数を削除する
+                let padding_sampling_size = padding_f0_size * 256;
+                output
+                    .drain(padding_sampling_size..output.len() - padding_sampling_size)
+                    .collect()
             }
         }
+    }
 
-        let mut index = 0;
-        let new_accent_phrases = accent_phrases
-            .iter()
-            .map(|accent_phrase| {
-                AccentPhraseModel::new(
-                    accent_phrase
-                        .moras()
-                        .iter()
-                        .map(|mora| {
-                            let new_mora = MoraModel::new(
-                                mora.text().clone(),
-                                mora.consonant().clone(),
-                                *mora.consonant_length(),
-                                mora.vowel().clone(),
-                                *mora.vowel_length(),
-                                f0_list[index + 1],
-                            );
-                            index += 1;
-                            new_mora
-                        })
-                        .collect(),
-                    *accent_phrase.accent(),
-                    accent_phrase.pause_mora().as_ref().map(|pause_mora| {
-                        let new_pause_mora = MoraModel::new(
-                            pause_mora.text().clone(),
-                            pause_mora.consonant().clone(),
-                            *pause_mora.consonant_length(),
-                            pause_mora.vowel().clone(),
-                            *pause_mora.vowel_length(),
-                            f0_list[index + 1],
-                        );
-                        index += 1;
-                        new_pause_mora
-                    }),
-                    *accent_phrase.is_interrogative(),
-                )
-            })
-            .collect();
+    #[cfg(windows)]
+    fn list_windows_video_cards() {
+        use std::{ffi::OsString, os::windows::ffi::OsStringExt as _};
 
-        return Ok(new_accent_phrases);
-
-        fn create_one_accent_list(
-            accent_list: &mut Vec<i64>,
-            accent_phrase: &AccentPhraseModel,
-            point: i32,
-        ) {
-            let mut one_accent_list: Vec<i64> = Vec::new();
-
-            for (i, mora) in accent_phrase.moras().iter().enumerate() {
-                let value = (i as i32 == point
-                    || (point < 0 && i == (accent_phrase.moras().len() as i32 + point) as usize))
-                    .into();
-                one_accent_list.push(value);
-                if mora.consonant().is_some() {
-                    one_accent_list.push(value);
+        use humansize::BINARY;
+        use tracing::{error, info};
+        use windows::Win32::Graphics::Dxgi::{
+            CreateDXGIFactory, IDXGIFactory, DXGI_ADAPTER_DESC, DXGI_ERROR_NOT_FOUND,
+        };
+
+        info!("検出されたGPU (DirectMLには1番目のGPUが使われます):");
+        match list_windows_video_cards() {
+            Ok(descs) => {
+                for desc in descs {
+                    let description = OsString::from_wide(trim_nul(&desc.Description));
+                    let vram = humansize::format_size(desc.DedicatedVideoMemory, BINARY);
+                    info!("  - {description:?} ({vram})");
                 }
             }
-            if accent_phrase.pause_mora().is_some() {
-                one_accent_list.push(0);
+            Err(err) => error!("{err}"),
+        }
+
+        fn list_windows_video_cards() -> windows::core::Result<Vec<DXGI_ADAPTER_DESC>> {
+            #[allow(unsafe_code)]
+            unsafe {
+                let factory = CreateDXGIFactory::<IDXGIFactory>()?;
+                (0..)
+                    .map(|i| factory.EnumAdapters(i)?.GetDesc())
+                    .take_while(|r| !matches!(r, Err(e) if e.code() == DXGI_ERROR_NOT_FOUND))
+                    .collect()
             }
-            accent_list.extend(one_accent_list)
         }
-    }
 
-    /// AquesTalk風記法から[AudioQuery]を生成する。
-    ///
-    /// # Example
-    ///
-    #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537
-    #[cfg_attr(not(windows), doc = "```")]
-    /// # #[tokio::main]
-    /// # async fn main() -> anyhow::Result<()> {
-    /// # let synthesizer =
-    /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
-    /// #         test_util::OPEN_JTALK_DIC_DIR,
-    /// #     )
-    /// #     .await?;
-    /// #
-    /// use voicevox_core::StyleId;
-    ///
-    /// let audio_query = synthesizer
-    ///     .audio_query_from_kana("コンニチワ'", StyleId::new(302))
-    ///     .await?;
-    /// #
-    /// # Ok(())
-    /// # }
-    /// ```
-    ///
-    /// [AudioQuery]: crate::AudioQueryModel
-    pub fn audio_query_from_kana(&self, kana: &str, style_id: StyleId) -> Result<AudioQueryModel> {
-        let accent_phrases = self.create_accent_phrases_from_kana(kana, style_id)?;
-        Ok(AudioQueryModel::from_accent_phrases(accent_phrases).with_kana(Some(kana.to_owned())))
+        fn trim_nul(s: &[u16]) -> &[u16] {
+            &s[..s.iter().position(|&c| c == 0x0000).unwrap_or(s.len())]
+        }
     }
-}
 
-impl<T: FullcontextExtractor> self::blocking::Synthesizer<T> {
-    /// 日本語のテキストから[AudioQuery]を生成する。
-    ///
-    /// # Examples
-    ///
-    #[cfg_attr(windows, doc = "```no_run")] // https://github.com/VOICEVOX/voicevox_core/issues/537
-    #[cfg_attr(not(windows), doc = "```")]
-    /// # #[tokio::main]
-    /// # async fn main() -> anyhow::Result<()> {
-    /// # let synthesizer =
-    /// #     voicevox_core::__internal::doctest_fixtures::synthesizer_with_sample_voice_model(
-    /// #         test_util::OPEN_JTALK_DIC_DIR,
-    /// #     )
-    /// #     .await?;
-    /// #
-    /// use voicevox_core::StyleId;
-    ///
-    /// let audio_query = synthesizer
-    ///     .audio_query("こんにちは", StyleId::new(302))
-    ///     .await?;
-    /// #
-    /// # Ok(())
-    /// # }
-    /// ```
-    ///
-    /// [AudioQuery]: crate::AudioQueryModel
-    pub fn audio_query(&self, text: &str, style_id: StyleId) -> Result<AudioQueryModel> {
-        let accent_phrases = self.create_accent_phrases(text, style_id)?;
-        Ok(AudioQueryModel::from_accent_phrases(accent_phrases))
-    }
-}
+    fn initial_process(accent_phrases: &[AccentPhraseModel]) -> (Vec<MoraModel>, Vec<OjtPhoneme>) {
+        let flatten_moras = to_flatten_moras(accent_phrases);
 
-impl<O> self::blocking::Synthesizer<O> {
-    /// AquesTalk風記法から音声合成を行う。
-    pub fn tts_from_kana(
-        &self,
-        kana: &str,
-        style_id: StyleId,
-        options: &TtsOptions,
-    ) -> Result<Vec<u8>> {
-        let audio_query = &self.audio_query_from_kana(kana, style_id)?;
-        self.synthesis(audio_query, style_id, &SynthesisOptions::from(options))
-    }
-}
+        let mut phoneme_strings = vec!["pau".to_string()];
+        for mora in flatten_moras.iter() {
+            if let Some(consonant) = mora.consonant() {
+                phoneme_strings.push(consonant.clone())
+            }
+            phoneme_strings.push(mora.vowel().clone());
+        }
+        phoneme_strings.push("pau".to_string());
 
-impl<T: FullcontextExtractor> self::blocking::Synthesizer<T> {
-    /// 日本語のテキストから音声合成を行う。
-    pub fn tts(&self, text: &str, style_id: StyleId, options: &TtsOptions) -> Result<Vec<u8>> {
-        let audio_query = &self.audio_query(text, style_id)?;
-        self.synthesis(audio_query, style_id, &SynthesisOptions::from(options))
-    }
-}
+        let phoneme_data_list = to_phoneme_data_list(&phoneme_strings);
 
-pub trait PerformInference {
-    /// `predict_duration`を実行する。
-    ///
-    /// # Performance
-    ///
-    /// CPU-boundな操作であるため、非同期ランタイム上では直接実行されるべきではない。
-    fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result<Vec<f32>>;
-
-    /// `predict_intonation`を実行する。
-    ///
-    /// # Performance
-    ///
-    /// CPU-boundな操作であるため、非同期ランタイム上では直接実行されるべきではない。
-    #[allow(clippy::too_many_arguments)]
-    fn predict_intonation(
-        &self,
-        length: usize,
-        vowel_phoneme_vector: &[i64],
-        consonant_phoneme_vector: &[i64],
-        start_accent_vector: &[i64],
-        end_accent_vector: &[i64],
-        start_accent_phrase_vector: &[i64],
-        end_accent_phrase_vector: &[i64],
-        style_id: StyleId,
-    ) -> Result<Vec<f32>>;
-
-    /// `decode`を実行する。
-    ///
-    /// # Performance
-    ///
-    /// CPU/GPU-boundな操作であるため、非同期ランタイム上では直接実行されるべきではない。
-    fn decode(
-        &self,
-        length: usize,
-        phoneme_size: usize,
-        f0: &[f32],
-        phoneme_vector: &[f32],
-        style_id: StyleId,
-    ) -> Result<Vec<f32>>;
-}
+        return (flatten_moras, phoneme_data_list);
+
+        fn to_flatten_moras(accent_phrases: &[AccentPhraseModel]) -> Vec<MoraModel> {
+            let mut flatten_moras = Vec::new();
+
+            for accent_phrase in accent_phrases {
+                let moras = accent_phrase.moras();
+                for mora in moras {
+                    flatten_moras.push(mora.clone());
+                }
+                if let Some(pause_mora) = accent_phrase.pause_mora() {
+                    flatten_moras.push(pause_mora.clone());
+                }
+            }
 
-impl<O> PerformInference for self::blocking::Synthesizer<O> {
-    fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result<Vec<f32>> {
-        // FIXME: `Status::ids_for`があるため、ここは不要なはず
-        if !self.status.validate_speaker_id(style_id) {
-            return Err(ErrorRepr::StyleNotFound { style_id }.into());
+            flatten_moras
         }
 
-        let (model_id, model_inner_id) = self.status.ids_for(style_id)?;
+        fn to_phoneme_data_list<T: AsRef<str>>(phoneme_str_list: &[T]) -> Vec<OjtPhoneme> {
+            OjtPhoneme::convert(
+                phoneme_str_list
+                    .iter()
+                    .enumerate()
+                    .map(|(i, s)| OjtPhoneme::new(s.as_ref().to_string(), i as f32, i as f32 + 1.))
+                    .collect::<Vec<OjtPhoneme>>()
+                    .as_slice(),
+            )
+        }
+    }
 
-        let PredictDurationOutput {
-            phoneme_length: output,
-        } = self.status.run_session(
-            &model_id,
-            PredictDurationInput {
-                phoneme_list: ndarray::arr1(phoneme_vector),
-                speaker_id: ndarray::arr1(&[model_inner_id.raw_id().into()]),
-            },
-        )?;
-        let mut output = output.into_raw_vec();
+    fn split_mora(phoneme_list: &[OjtPhoneme]) -> (Vec<OjtPhoneme>, Vec<OjtPhoneme>, Vec<i64>) {
+        let mut vowel_indexes = Vec::new();
+        for (i, phoneme) in phoneme_list.iter().enumerate() {
+            const MORA_PHONEME_LIST: &[&str] = &[
+                "a", "i", "u", "e", "o", "N", "A", "I", "U", "E", "O", "cl", "pau",
+            ];
 
-        for output_item in output.iter_mut() {
-            if *output_item < PHONEME_LENGTH_MINIMAL {
-                *output_item = PHONEME_LENGTH_MINIMAL;
+            if MORA_PHONEME_LIST
+                .iter()
+                .any(|mora_phoneme| *mora_phoneme == phoneme.phoneme())
+            {
+                vowel_indexes.push(i as i64);
             }
         }
 
-        return Ok(output);
+        let vowel_phoneme_list = vowel_indexes
+            .iter()
+            .map(|vowel_index| phoneme_list[*vowel_index as usize].clone())
+            .collect();
+
+        let mut consonant_phoneme_list = vec![OjtPhoneme::default()];
+        for i in 0..(vowel_indexes.len() - 1) {
+            let prev = vowel_indexes[i];
+            let next = vowel_indexes[i + 1];
+            if next - prev == 1 {
+                consonant_phoneme_list.push(OjtPhoneme::default());
+            } else {
+                consonant_phoneme_list.push(phoneme_list[next as usize - 1].clone());
+            }
+        }
 
-        const PHONEME_LENGTH_MINIMAL: f32 = 0.01;
+        (consonant_phoneme_list, vowel_phoneme_list, vowel_indexes)
     }
 
-    fn predict_intonation(
-        &self,
-        length: usize,
-        vowel_phoneme_vector: &[i64],
-        consonant_phoneme_vector: &[i64],
-        start_accent_vector: &[i64],
-        end_accent_vector: &[i64],
-        start_accent_phrase_vector: &[i64],
-        end_accent_phrase_vector: &[i64],
-        style_id: StyleId,
-    ) -> Result<Vec<f32>> {
-        // FIXME: `Status::ids_for`があるため、ここは不要なはず
-        if !self.status.validate_speaker_id(style_id) {
-            return Err(ErrorRepr::StyleNotFound { style_id }.into());
+    fn mora_to_text(mora: impl AsRef<str>) -> String {
+        let last_char = mora.as_ref().chars().last().unwrap();
+        let mora = if ['A', 'I', 'U', 'E', 'O'].contains(&last_char) {
+            format!(
+                "{}{}",
+                &mora.as_ref()[0..mora.as_ref().len() - 1],
+                last_char.to_lowercase()
+            )
+        } else {
+            mora.as_ref().to_string()
+        };
+        // もしカタカナに変換できなければ、引数で与えた文字列がそのまま返ってくる
+        engine::mora2text(&mora).to_string()
+    }
+
+    impl AudioQueryModel {
+        fn from_accent_phrases(accent_phrases: Vec<AccentPhraseModel>) -> Self {
+            let kana = create_kana(&accent_phrases);
+            Self::new(
+                accent_phrases,
+                1.,
+                0.,
+                1.,
+                1.,
+                0.1,
+                0.1,
+                DEFAULT_SAMPLING_RATE,
+                false,
+                Some(kana),
+            )
         }
+    }
+}
 
-        let (model_id, model_inner_id) = self.status.ids_for(style_id)?;
-
-        let PredictIntonationOutput { f0_list: output } = self.status.run_session(
-            &model_id,
-            PredictIntonationInput {
-                length: ndarray::arr0(length as i64),
-                vowel_phoneme_list: ndarray::arr1(vowel_phoneme_vector),
-                consonant_phoneme_list: ndarray::arr1(consonant_phoneme_vector),
-                start_accent_list: ndarray::arr1(start_accent_vector),
-                end_accent_list: ndarray::arr1(end_accent_vector),
-                start_accent_phrase_list: ndarray::arr1(start_accent_phrase_vector),
-                end_accent_phrase_list: ndarray::arr1(end_accent_phrase_vector),
-                speaker_id: ndarray::arr1(&[model_inner_id.raw_id().into()]),
-            },
-        )?;
+pub(crate) mod tokio {
+    use std::sync::Arc;
 
-        Ok(output.into_raw_vec())
-    }
+    use crate::{
+        AccentPhraseModel, AudioQueryModel, FullcontextExtractor, Result, StyleId,
+        SynthesisOptions, VoiceModelId, VoiceModelMeta,
+    };
 
-    fn decode(
-        &self,
-        length: usize,
-        phoneme_size: usize,
-        f0: &[f32],
-        phoneme_vector: &[f32],
-        style_id: StyleId,
-    ) -> Result<Vec<f32>> {
-        // FIXME: `Status::ids_for`があるため、ここは不要なはず
-        if !self.status.validate_speaker_id(style_id) {
-            return Err(ErrorRepr::StyleNotFound { style_id }.into());
-        }
+    use super::{InitializeOptions, TtsOptions};
 
-        let (model_id, model_inner_id) = self.status.ids_for(style_id)?;
-
-        // 音が途切れてしまうのを避けるworkaround処理が入っている
-        // TODO: 改善したらここのpadding処理を取り除く
-        const PADDING_SIZE: f64 = 0.4;
-        let padding_size = ((PADDING_SIZE * DEFAULT_SAMPLING_RATE as f64) / 256.0).round() as usize;
-        let start_and_end_padding_size = 2 * padding_size;
-        let length_with_padding = length + start_and_end_padding_size;
-        let f0_with_padding = make_f0_with_padding(f0, length_with_padding, padding_size);
-
-        let phoneme_with_padding = make_phoneme_with_padding(
-            phoneme_vector,
-            phoneme_size,
-            length_with_padding,
-            padding_size,
-        );
+    /// 音声シンセサイザ。
+    #[derive(Clone)]
+    pub struct Synthesizer<O>(pub(super) Arc<super::blocking::Synthesizer<O>>);
 
-        let DecodeOutput { wave: output } = self.status.run_session(
-            &model_id,
-            DecodeInput {
-                f0: ndarray::arr1(&f0_with_padding)
-                    .into_shape([length_with_padding, 1])
-                    .unwrap(),
-                phoneme: ndarray::arr1(&phoneme_with_padding)
-                    .into_shape([length_with_padding, phoneme_size])
-                    .unwrap(),
-                speaker_id: ndarray::arr1(&[model_inner_id.raw_id().into()]),
-            },
-        )?;
-
-        return Ok(trim_padding_from_output(
-            output.into_raw_vec(),
-            padding_size,
-        ));
-
-        fn make_f0_with_padding(
-            f0_slice: &[f32],
-            length_with_padding: usize,
-            padding_size: usize,
-        ) -> Vec<f32> {
-            // 音が途切れてしまうのを避けるworkaround処理
-            // 改善したらこの関数を削除する
-            let mut f0_with_padding = Vec::with_capacity(length_with_padding);
-            let padding = vec![0.0; padding_size];
-            f0_with_padding.extend_from_slice(&padding);
-            f0_with_padding.extend_from_slice(f0_slice);
-            f0_with_padding.extend_from_slice(&padding);
-            f0_with_padding
+    // FIXME: docを書く
+    impl<O: Send + Sync + 'static> self::Synthesizer<O> {
+        pub fn new(open_jtalk: O, options: &InitializeOptions) -> Result<Self> {
+            super::blocking::Synthesizer::new(open_jtalk, options)
+                .map(Into::into)
+                .map(Self)
         }
 
-        fn make_phoneme_with_padding(
-            phoneme_slice: &[f32],
-            phoneme_size: usize,
-            length_with_padding: usize,
-            padding_size: usize,
-        ) -> Vec<f32> {
-            // 音が途切れてしまうのを避けるworkaround処理
-            // 改善したらこの関数を削除する
-            let mut padding_phoneme = vec![0.0; phoneme_size];
-            padding_phoneme[0] = 1.0;
-            let padding_phoneme_len = padding_phoneme.len();
-            let padding_phonemes: Vec<f32> = padding_phoneme
-                .into_iter()
-                .cycle()
-                .take(padding_phoneme_len * padding_size)
-                .collect();
-            let mut phoneme_with_padding = Vec::with_capacity(phoneme_size * length_with_padding);
-            phoneme_with_padding.extend_from_slice(&padding_phonemes);
-            phoneme_with_padding.extend_from_slice(phoneme_slice);
-            phoneme_with_padding.extend_from_slice(&padding_phonemes);
-
-            phoneme_with_padding
+        pub fn is_gpu_mode(&self) -> bool {
+            self.0.is_gpu_mode()
         }
 
-        fn trim_padding_from_output(mut output: Vec<f32>, padding_f0_size: usize) -> Vec<f32> {
-            // 音が途切れてしまうのを避けるworkaround処理
-            // 改善したらこの関数を削除する
-            let padding_sampling_size = padding_f0_size * 256;
-            output
-                .drain(padding_sampling_size..output.len() - padding_sampling_size)
-                .collect()
+        pub async fn load_voice_model(&self, model: &crate::tokio::VoiceModel) -> Result<()> {
+            let model_bytes = &model.read_inference_models().await?;
+            self.0.status.insert_model(model.header(), model_bytes)
         }
-    }
-}
 
-#[cfg(windows)]
-fn list_windows_video_cards() {
-    use std::{ffi::OsString, os::windows::ffi::OsStringExt as _};
+        pub fn unload_voice_model(&self, voice_model_id: &VoiceModelId) -> Result<()> {
+            self.0.unload_voice_model(voice_model_id)
+        }
 
-    use humansize::BINARY;
-    use tracing::{error, info};
-    use windows::Win32::Graphics::Dxgi::{
-        CreateDXGIFactory, IDXGIFactory, DXGI_ADAPTER_DESC, DXGI_ERROR_NOT_FOUND,
-    };
+        pub fn is_loaded_voice_model(&self, voice_model_id: &VoiceModelId) -> bool {
+            self.0.is_loaded_voice_model(voice_model_id)
+        }
 
-    info!("検出されたGPU (DirectMLには1番目のGPUが使われます):");
-    match list_windows_video_cards() {
-        Ok(descs) => {
-            for desc in descs {
-                let description = OsString::from_wide(trim_nul(&desc.Description));
-                let vram = humansize::format_size(desc.DedicatedVideoMemory, BINARY);
-                info!("  - {description:?} ({vram})");
-            }
+        #[doc(hidden)]
+        pub fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool {
+            self.0.is_loaded_model_by_style_id(style_id)
         }
-        Err(err) => error!("{err}"),
-    }
 
-    fn list_windows_video_cards() -> windows::core::Result<Vec<DXGI_ADAPTER_DESC>> {
-        #[allow(unsafe_code)]
-        unsafe {
-            let factory = CreateDXGIFactory::<IDXGIFactory>()?;
-            (0..)
-                .map(|i| factory.EnumAdapters(i)?.GetDesc())
-                .take_while(|r| !matches!(r, Err(e) if e.code() == DXGI_ERROR_NOT_FOUND))
-                .collect()
+        pub fn metas(&self) -> VoiceModelMeta {
+            self.0.metas()
         }
-    }
 
-    fn trim_nul(s: &[u16]) -> &[u16] {
-        &s[..s.iter().position(|&c| c == 0x0000).unwrap_or(s.len())]
-    }
-}
+        pub async fn synthesis(
+            &self,
+            audio_query: &AudioQueryModel,
+            style_id: StyleId,
+            options: &SynthesisOptions,
+        ) -> Result<Vec<u8>> {
+            let blocking = self.0.clone();
+            let audio_query = audio_query.clone();
+            let options = options.clone();
+
+            crate::task::asyncify(move || blocking.synthesis(&audio_query, style_id, &options))
+                .await
+        }
 
-fn initial_process(accent_phrases: &[AccentPhraseModel]) -> (Vec<MoraModel>, Vec<OjtPhoneme>) {
-    let flatten_moras = to_flatten_moras(accent_phrases);
+        pub async fn create_accent_phrases_from_kana(
+            &self,
+            kana: &str,
+            style_id: StyleId,
+        ) -> Result<Vec<AccentPhraseModel>> {
+            let blocking = self.0.clone();
+            let kana = kana.to_owned();
 
-    let mut phoneme_strings = vec!["pau".to_string()];
-    for mora in flatten_moras.iter() {
-        if let Some(consonant) = mora.consonant() {
-            phoneme_strings.push(consonant.clone())
+            crate::task::asyncify(move || blocking.create_accent_phrases_from_kana(&kana, style_id))
+                .await
         }
-        phoneme_strings.push(mora.vowel().clone());
-    }
-    phoneme_strings.push("pau".to_string());
 
-    let phoneme_data_list = to_phoneme_data_list(&phoneme_strings);
+        pub async fn replace_mora_data(
+            &self,
+            accent_phrases: &[AccentPhraseModel],
+            style_id: StyleId,
+        ) -> Result<Vec<AccentPhraseModel>> {
+            let blocking = self.0.clone();
+            let accent_phrases = accent_phrases.to_owned();
 
-    return (flatten_moras, phoneme_data_list);
+            crate::task::asyncify(move || blocking.replace_mora_data(&accent_phrases, style_id))
+                .await
+        }
 
-    fn to_flatten_moras(accent_phrases: &[AccentPhraseModel]) -> Vec<MoraModel> {
-        let mut flatten_moras = Vec::new();
+        pub async fn replace_phoneme_length(
+            &self,
+            accent_phrases: &[AccentPhraseModel],
+            style_id: StyleId,
+        ) -> Result<Vec<AccentPhraseModel>> {
+            let blocking = self.0.clone();
+            let accent_phrases = accent_phrases.to_owned();
 
-        for accent_phrase in accent_phrases {
-            let moras = accent_phrase.moras();
-            for mora in moras {
-                flatten_moras.push(mora.clone());
-            }
-            if let Some(pause_mora) = accent_phrase.pause_mora() {
-                flatten_moras.push(pause_mora.clone());
-            }
+            crate::task::asyncify(move || {
+                blocking.replace_phoneme_length(&accent_phrases, style_id)
+            })
+            .await
         }
 
-        flatten_moras
-    }
+        pub async fn replace_mora_pitch(
+            &self,
+            accent_phrases: &[AccentPhraseModel],
+            style_id: StyleId,
+        ) -> Result<Vec<AccentPhraseModel>> {
+            let blocking = self.0.clone();
+            let accent_phrases = accent_phrases.to_owned();
 
-    fn to_phoneme_data_list<T: AsRef<str>>(phoneme_str_list: &[T]) -> Vec<OjtPhoneme> {
-        OjtPhoneme::convert(
-            phoneme_str_list
-                .iter()
-                .enumerate()
-                .map(|(i, s)| OjtPhoneme::new(s.as_ref().to_string(), i as f32, i as f32 + 1.))
-                .collect::<Vec<OjtPhoneme>>()
-                .as_slice(),
-        )
-    }
-}
+            crate::task::asyncify(move || blocking.replace_mora_pitch(&accent_phrases, style_id))
+                .await
+        }
 
-fn split_mora(phoneme_list: &[OjtPhoneme]) -> (Vec<OjtPhoneme>, Vec<OjtPhoneme>, Vec<i64>) {
-    let mut vowel_indexes = Vec::new();
-    for (i, phoneme) in phoneme_list.iter().enumerate() {
-        const MORA_PHONEME_LIST: &[&str] = &[
-            "a", "i", "u", "e", "o", "N", "A", "I", "U", "E", "O", "cl", "pau",
-        ];
+        pub async fn audio_query_from_kana(
+            &self,
+            kana: &str,
+            style_id: StyleId,
+        ) -> Result<AudioQueryModel> {
+            let blocking = self.0.clone();
+            let kana = kana.to_owned();
 
-        if MORA_PHONEME_LIST
-            .iter()
-            .any(|mora_phoneme| *mora_phoneme == phoneme.phoneme())
-        {
-            vowel_indexes.push(i as i64);
+            crate::task::asyncify(move || blocking.audio_query_from_kana(&kana, style_id)).await
         }
-    }
 
-    let vowel_phoneme_list = vowel_indexes
-        .iter()
-        .map(|vowel_index| phoneme_list[*vowel_index as usize].clone())
-        .collect();
-
-    let mut consonant_phoneme_list = vec![OjtPhoneme::default()];
-    for i in 0..(vowel_indexes.len() - 1) {
-        let prev = vowel_indexes[i];
-        let next = vowel_indexes[i + 1];
-        if next - prev == 1 {
-            consonant_phoneme_list.push(OjtPhoneme::default());
-        } else {
-            consonant_phoneme_list.push(phoneme_list[next as usize - 1].clone());
+        pub async fn tts_from_kana(
+            &self,
+            kana: &str,
+            style_id: StyleId,
+            options: &TtsOptions,
+        ) -> Result<Vec<u8>> {
+            let blocking = self.0.clone();
+            let kana = kana.to_owned();
+            let options = options.clone();
+
+            crate::task::asyncify(move || blocking.tts_from_kana(&kana, style_id, &options)).await
         }
     }
 
-    (consonant_phoneme_list, vowel_phoneme_list, vowel_indexes)
-}
+    impl<T: FullcontextExtractor> self::Synthesizer<T> {
+        pub async fn create_accent_phrases(
+            &self,
+            text: &str,
+            style_id: StyleId,
+        ) -> Result<Vec<AccentPhraseModel>> {
+            let blocking = self.0.clone();
+            let text = text.to_owned();
 
-fn mora_to_text(mora: impl AsRef<str>) -> String {
-    let last_char = mora.as_ref().chars().last().unwrap();
-    let mora = if ['A', 'I', 'U', 'E', 'O'].contains(&last_char) {
-        format!(
-            "{}{}",
-            &mora.as_ref()[0..mora.as_ref().len() - 1],
-            last_char.to_lowercase()
-        )
-    } else {
-        mora.as_ref().to_string()
-    };
-    // もしカタカナに変換できなければ、引数で与えた文字列がそのまま返ってくる
-    engine::mora2text(&mora).to_string()
-}
-
-impl AudioQueryModel {
-    fn from_accent_phrases(accent_phrases: Vec<AccentPhraseModel>) -> Self {
-        let kana = create_kana(&accent_phrases);
-        Self::new(
-            accent_phrases,
-            1.,
-            0.,
-            1.,
-            1.,
-            0.1,
-            0.1,
-            DEFAULT_SAMPLING_RATE,
-            false,
-            Some(kana),
-        )
-    }
-}
+            crate::task::asyncify(move || blocking.create_accent_phrases(&text, style_id)).await
+        }
 
-pub(crate) mod blocking {
-    use crate::infer::{domain::InferenceDomainImpl, status::Status};
+        pub async fn audio_query(&self, text: &str, style_id: StyleId) -> Result<AudioQueryModel> {
+            let blocking = self.0.clone();
+            let text = text.to_owned();
 
-    use super::InferenceRuntimeImpl;
+            crate::task::asyncify(move || blocking.audio_query(&text, style_id)).await
+        }
 
-    /// 音声シンセサイザ。
-    pub struct Synthesizer<O> {
-        pub(super) status: Status<InferenceRuntimeImpl, InferenceDomainImpl>,
-        pub(super) open_jtalk: O,
-        pub(super) use_gpu: bool,
+        pub async fn tts(
+            &self,
+            text: &str,
+            style_id: StyleId,
+            options: &TtsOptions,
+        ) -> Result<Vec<u8>> {
+            let blocking = self.0.clone();
+            let text = text.to_owned();
+            let options = options.clone();
+
+            crate::task::asyncify(move || blocking.tts(&text, style_id, &options)).await
+        }
     }
 }
 
-pub(crate) mod tokio {
-    use std::sync::Arc;
-
-    /// 音声シンセサイザ。
-    #[derive(Clone)]
-    pub struct Synthesizer<O>(pub(super) Arc<super::blocking::Synthesizer<O>>);
-}
-
 #[cfg(test)]
 mod tests {
 
-    use super::{AccelerationMode, InitializeOptions, PerformInference as _};
+    use super::{blocking::PerformInference as _, AccelerationMode, InitializeOptions};
     use crate::{
         engine::MoraModel, macros::tests::assert_debug_fmt_eq, test_util::open_default_vvm_file,
         AccentPhraseModel, Result, StyleId,
diff --git a/crates/voicevox_core/src/user_dict/dict.rs b/crates/voicevox_core/src/user_dict/dict.rs
index 5743b678f..6997620f0 100644
--- a/crates/voicevox_core/src/user_dict/dict.rs
+++ b/crates/voicevox_core/src/user_dict/dict.rs
@@ -1,174 +1,178 @@
-use indexmap::IndexMap;
-use itertools::join;
-use uuid::Uuid;
+pub(crate) mod blocking {
+    use indexmap::IndexMap;
+    use itertools::join;
+    use uuid::Uuid;
 
-use crate::{error::ErrorRepr, Result, UserDictWord};
+    use crate::{error::ErrorRepr, Result};
 
-impl self::blocking::UserDict {
-    /// ユーザー辞書を作成する。
-    pub fn new() -> Self {
-        Default::default()
-    }
+    use super::super::word::UserDictWord;
 
-    pub fn to_json(&self) -> String {
-        serde_json::to_string(&*self.words.lock().unwrap()).expect("should not fail")
+    /// ユーザー辞書。
+    ///
+    /// 単語はJSONとの相互変換のために挿入された順序を保つ。
+    #[derive(Debug, Default)]
+    pub struct UserDict {
+        words: std::sync::Mutex<IndexMap<Uuid, UserDictWord>>,
     }
 
-    pub fn with_words<R>(&self, f: impl FnOnce(&IndexMap<Uuid, UserDictWord>) -> R) -> R {
-        f(&self.words.lock().unwrap())
-    }
+    impl self::UserDict {
+        /// ユーザー辞書を作成する。
+        pub fn new() -> Self {
+            Default::default()
+        }
 
-    /// ユーザー辞書をファイルから読み込む。
-    ///
-    /// # Errors
-    ///
-    /// ファイルが読めなかった、または内容が不正だった場合はエラーを返す。
-    pub fn load(&self, store_path: &str) -> Result<()> {
-        let words = (|| {
-            let words = &fs_err::read(store_path)?;
-            let words = serde_json::from_slice::<IndexMap<_, _>>(words)?;
-            Ok(words)
-        })()
-        .map_err(ErrorRepr::LoadUserDict)?;
-
-        self.words.lock().unwrap().extend(words);
-        Ok(())
-    }
+        pub fn to_json(&self) -> String {
+            serde_json::to_string(&*self.words.lock().unwrap()).expect("should not fail")
+        }
 
-    /// ユーザー辞書に単語を追加する。
-    pub fn add_word(&self, word: UserDictWord) -> Result<Uuid> {
-        let word_uuid = Uuid::new_v4();
-        self.words.lock().unwrap().insert(word_uuid, word);
-        Ok(word_uuid)
-    }
+        pub fn with_words<R>(&self, f: impl FnOnce(&IndexMap<Uuid, UserDictWord>) -> R) -> R {
+            f(&self.words.lock().unwrap())
+        }
 
-    /// ユーザー辞書の単語を変更する。
-    pub fn update_word(&self, word_uuid: Uuid, new_word: UserDictWord) -> Result<()> {
-        let mut words = self.words.lock().unwrap();
-        if !words.contains_key(&word_uuid) {
-            return Err(ErrorRepr::WordNotFound(word_uuid).into());
+        /// ユーザー辞書をファイルから読み込む。
+        ///
+        /// # Errors
+        ///
+        /// ファイルが読めなかった、または内容が不正だった場合はエラーを返す。
+        pub fn load(&self, store_path: &str) -> Result<()> {
+            let words = (|| {
+                let words = &fs_err::read(store_path)?;
+                let words = serde_json::from_slice::<IndexMap<_, _>>(words)?;
+                Ok(words)
+            })()
+            .map_err(ErrorRepr::LoadUserDict)?;
+
+            self.words.lock().unwrap().extend(words);
+            Ok(())
         }
-        words.insert(word_uuid, new_word);
-        Ok(())
-    }
 
-    /// ユーザー辞書から単語を削除する。
-    pub fn remove_word(&self, word_uuid: Uuid) -> Result<UserDictWord> {
-        let Some(word) = self.words.lock().unwrap().remove(&word_uuid) else {
-            return Err(ErrorRepr::WordNotFound(word_uuid).into());
-        };
-        Ok(word)
-    }
+        /// ユーザー辞書に単語を追加する。
+        pub fn add_word(&self, word: UserDictWord) -> Result<Uuid> {
+            let word_uuid = Uuid::new_v4();
+            self.words.lock().unwrap().insert(word_uuid, word);
+            Ok(word_uuid)
+        }
 
-    /// 他のユーザー辞書をインポートする。
-    pub fn import(&self, other: &Self) -> Result<()> {
-        for (word_uuid, word) in &*other.words.lock().unwrap() {
-            self.words.lock().unwrap().insert(*word_uuid, word.clone());
+        /// ユーザー辞書の単語を変更する。
+        pub fn update_word(&self, word_uuid: Uuid, new_word: UserDictWord) -> Result<()> {
+            let mut words = self.words.lock().unwrap();
+            if !words.contains_key(&word_uuid) {
+                return Err(ErrorRepr::WordNotFound(word_uuid).into());
+            }
+            words.insert(word_uuid, new_word);
+            Ok(())
         }
-        Ok(())
-    }
 
-    /// ユーザー辞書を保存する。
-    pub fn save(&self, store_path: &str) -> Result<()> {
-        fs_err::write(
-            store_path,
-            serde_json::to_vec(&self.words).expect("should not fail"),
-        )
-        .map_err(|e| ErrorRepr::SaveUserDict(e.into()).into())
-    }
+        /// ユーザー辞書から単語を削除する。
+        pub fn remove_word(&self, word_uuid: Uuid) -> Result<UserDictWord> {
+            let Some(word) = self.words.lock().unwrap().remove(&word_uuid) else {
+                return Err(ErrorRepr::WordNotFound(word_uuid).into());
+            };
+            Ok(word)
+        }
+
+        /// 他のユーザー辞書をインポートする。
+        pub fn import(&self, other: &Self) -> Result<()> {
+            for (word_uuid, word) in &*other.words.lock().unwrap() {
+                self.words.lock().unwrap().insert(*word_uuid, word.clone());
+            }
+            Ok(())
+        }
+
+        /// ユーザー辞書を保存する。
+        pub fn save(&self, store_path: &str) -> Result<()> {
+            fs_err::write(
+                store_path,
+                serde_json::to_vec(&self.words).expect("should not fail"),
+            )
+            .map_err(|e| ErrorRepr::SaveUserDict(e.into()).into())
+        }
 
-    /// MeCabで使用する形式に変換する。
-    pub(crate) fn to_mecab_format(&self) -> String {
-        join(
-            self.words
-                .lock()
-                .unwrap()
-                .values()
-                .map(UserDictWord::to_mecab_format),
-            "\n",
-        )
+        /// MeCabで使用する形式に変換する。
+        pub(crate) fn to_mecab_format(&self) -> String {
+            join(
+                self.words
+                    .lock()
+                    .unwrap()
+                    .values()
+                    .map(UserDictWord::to_mecab_format),
+                "\n",
+            )
+        }
     }
 }
 
-impl self::tokio::UserDict {
-    /// ユーザー辞書を作成する。
-    pub fn new() -> Self {
-        Self(self::blocking::UserDict::new().into())
-    }
+pub(crate) mod tokio {
+    use std::sync::Arc;
 
-    pub fn to_json(&self) -> String {
-        self.0.to_json()
-    }
+    use indexmap::IndexMap;
+    use uuid::Uuid;
 
-    pub fn with_words<R>(&self, f: impl FnOnce(&IndexMap<Uuid, UserDictWord>) -> R) -> R {
-        self.0.with_words(f)
-    }
+    use crate::Result;
 
-    /// ユーザー辞書をファイルから読み込む。
-    ///
-    /// # Errors
-    ///
-    /// ファイルが読めなかった、または内容が不正だった場合はエラーを返す。
-    pub async fn load(&self, store_path: &str) -> Result<()> {
-        let blocking = self.0.clone();
-        let store_path = store_path.to_owned();
-        crate::task::asyncify(move || blocking.load(&store_path)).await
-    }
+    use super::super::word::UserDictWord;
 
-    /// ユーザー辞書に単語を追加する。
-    pub fn add_word(&self, word: UserDictWord) -> Result<Uuid> {
-        self.0.add_word(word)
-    }
+    /// ユーザー辞書。
+    ///
+    /// 単語はJSONとの相互変換のために挿入された順序を保つ。
+    #[derive(Debug, Default)]
+    pub struct UserDict(Arc<super::blocking::UserDict>);
 
-    /// ユーザー辞書の単語を変更する。
-    pub fn update_word(&self, word_uuid: Uuid, new_word: UserDictWord) -> Result<()> {
-        self.0.update_word(word_uuid, new_word)
-    }
+    impl self::UserDict {
+        /// ユーザー辞書を作成する。
+        pub fn new() -> Self {
+            Self(super::blocking::UserDict::new().into())
+        }
 
-    /// ユーザー辞書から単語を削除する。
-    pub fn remove_word(&self, word_uuid: Uuid) -> Result<UserDictWord> {
-        self.0.remove_word(word_uuid)
-    }
+        pub fn to_json(&self) -> String {
+            self.0.to_json()
+        }
 
-    /// 他のユーザー辞書をインポートする。
-    pub fn import(&self, other: &Self) -> Result<()> {
-        self.0.import(&other.0)
-    }
+        pub fn with_words<R>(&self, f: impl FnOnce(&IndexMap<Uuid, UserDictWord>) -> R) -> R {
+            self.0.with_words(f)
+        }
 
-    /// ユーザー辞書を保存する。
-    pub async fn save(&self, store_path: &str) -> Result<()> {
-        let blocking = self.0.clone();
-        let store_path = store_path.to_owned();
-        crate::task::asyncify(move || blocking.save(&store_path)).await
-    }
+        /// ユーザー辞書をファイルから読み込む。
+        ///
+        /// # Errors
+        ///
+        /// ファイルが読めなかった、または内容が不正だった場合はエラーを返す。
+        pub async fn load(&self, store_path: &str) -> Result<()> {
+            let blocking = self.0.clone();
+            let store_path = store_path.to_owned();
+            crate::task::asyncify(move || blocking.load(&store_path)).await
+        }
 
-    /// MeCabで使用する形式に変換する。
-    pub(crate) fn to_mecab_format(&self) -> String {
-        self.0.to_mecab_format()
-    }
-}
+        /// ユーザー辞書に単語を追加する。
+        pub fn add_word(&self, word: UserDictWord) -> Result<Uuid> {
+            self.0.add_word(word)
+        }
 
-pub(crate) mod blocking {
-    use indexmap::IndexMap;
-    use uuid::Uuid;
+        /// ユーザー辞書の単語を変更する。
+        pub fn update_word(&self, word_uuid: Uuid, new_word: UserDictWord) -> Result<()> {
+            self.0.update_word(word_uuid, new_word)
+        }
 
-    use super::UserDictWord;
+        /// ユーザー辞書から単語を削除する。
+        pub fn remove_word(&self, word_uuid: Uuid) -> Result<UserDictWord> {
+            self.0.remove_word(word_uuid)
+        }
 
-    /// ユーザー辞書。
-    ///
-    /// 単語はJSONとの相互変換のために挿入された順序を保つ。
-    #[derive(Debug, Default)]
-    pub struct UserDict {
-        pub(super) words: std::sync::Mutex<IndexMap<Uuid, UserDictWord>>,
-    }
-}
+        /// 他のユーザー辞書をインポートする。
+        pub fn import(&self, other: &Self) -> Result<()> {
+            self.0.import(&other.0)
+        }
 
-pub(crate) mod tokio {
-    use std::sync::Arc;
+        /// ユーザー辞書を保存する。
+        pub async fn save(&self, store_path: &str) -> Result<()> {
+            let blocking = self.0.clone();
+            let store_path = store_path.to_owned();
+            crate::task::asyncify(move || blocking.save(&store_path)).await
+        }
 
-    /// ユーザー辞書。
-    ///
-    /// 単語はJSONとの相互変換のために挿入された順序を保つ。
-    #[derive(Debug, Default)]
-    pub struct UserDict(pub(super) Arc<super::blocking::UserDict>);
+        /// MeCabで使用する形式に変換する。
+        pub(crate) fn to_mecab_format(&self) -> String {
+            self.0.to_mecab_format()
+        }
+    }
 }
diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs
index e0a080bfc..96bf481d1 100644
--- a/crates/voicevox_core/src/voice_model.rs
+++ b/crates/voicevox_core/src/voice_model.rs
@@ -1,23 +1,12 @@
 use derive_getters::Getters;
 use derive_new::new;
-use enum_map::EnumMap;
-use futures::future::join3;
-use nanoid::nanoid;
-use ouroboros::self_referencing;
-use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
-use serde::{de::DeserializeOwned, Deserialize};
+use serde::Deserialize;
 
 use crate::{
-    error::{LoadModelError, LoadModelErrorKind, LoadModelResult},
-    infer::domain::InferenceOperationImpl,
     manifest::{Manifest, ModelInnerId},
-    Result, SpeakerMeta, StyleId, StyleMeta, VoiceModelMeta,
-};
-use std::{
-    collections::{BTreeMap, HashMap},
-    io::{self, Cursor},
-    path::{Path, PathBuf},
+    SpeakerMeta, StyleId, StyleMeta, VoiceModelMeta,
 };
+use std::{collections::BTreeMap, path::PathBuf};
 
 /// [`VoiceModelId`]の実体。
 ///
@@ -32,234 +21,6 @@ pub struct VoiceModelId {
     raw_voice_model_id: RawVoiceModelId,
 }
 
-impl self::blocking::VoiceModel {
-    pub(crate) fn read_inference_models(
-        &self,
-    ) -> LoadModelResult<EnumMap<InferenceOperationImpl, Vec<u8>>> {
-        let reader = BlockingVvmEntryReader::open(&self.header.path)?;
-
-        let model_bytes = [
-            self.header.manifest.predict_duration_filename(),
-            self.header.manifest.predict_intonation_filename(),
-            self.header.manifest.decode_filename(),
-        ]
-        .into_par_iter()
-        .map(|filename| reader.read_vvm_entry(filename))
-        .collect::<std::result::Result<Vec<_>, _>>()?
-        .try_into()
-        .unwrap_or_else(|_| panic!("should be same length"));
-
-        Ok(EnumMap::from_array(model_bytes))
-    }
-
-    /// VVMファイルから`VoiceModel`をコンストラクトする。
-    pub fn from_path(path: impl AsRef<Path>) -> crate::Result<Self> {
-        let path = path.as_ref().to_owned();
-        let reader = BlockingVvmEntryReader::open(&path)?;
-        let manifest = reader.read_vvm_json::<Manifest>("manifest.json")?;
-        let metas = reader.read_vvm_json(manifest.metas_filename())?;
-        let id = VoiceModelId::new(nanoid!());
-
-        Ok(Self {
-            header: VoiceModelHeader {
-                id,
-                metas,
-                manifest,
-                path,
-            },
-        })
-    }
-
-    /// ID。
-    pub fn id(&self) -> &VoiceModelId {
-        &self.header.id
-    }
-
-    /// メタ情報。
-    pub fn metas(&self) -> &VoiceModelMeta {
-        &self.header.metas
-    }
-
-    pub(crate) fn header(&self) -> &VoiceModelHeader {
-        &self.header
-    }
-}
-
-#[self_referencing]
-struct BlockingVvmEntryReader {
-    path: PathBuf,
-    zip: Vec<u8>,
-    #[covariant]
-    #[borrows(zip)]
-    reader: zip::ZipArchive<Cursor<&'this [u8]>>,
-}
-
-impl BlockingVvmEntryReader {
-    fn open(path: &Path) -> LoadModelResult<Self> {
-        (|| {
-            let zip = std::fs::read(path)?;
-            Self::try_new(path.to_owned(), zip, |zip| {
-                zip::ZipArchive::new(Cursor::new(zip))
-            })
-        })()
-        .map_err(|source| LoadModelError {
-            path: path.to_owned(),
-            context: LoadModelErrorKind::OpenZipFile,
-            source: Some(source.into()),
-        })
-    }
-
-    fn read_vvm_json<T: DeserializeOwned>(&self, filename: &str) -> LoadModelResult<T> {
-        let bytes = &self.read_vvm_entry(filename)?;
-        serde_json::from_slice(bytes).map_err(|source| LoadModelError {
-            path: self.borrow_path().clone(),
-            context: LoadModelErrorKind::OpenZipFile,
-            source: Some(source.into()),
-        })
-    }
-
-    fn read_vvm_entry(&self, filename: &str) -> LoadModelResult<Vec<u8>> {
-        (|| {
-            let mut reader = self.borrow_reader().clone();
-            let mut entry = reader.by_name(filename)?;
-            let mut buf = Vec::with_capacity(entry.size() as _);
-            io::copy(&mut entry, &mut buf)?;
-            Ok(buf)
-        })()
-        .map_err(|source| LoadModelError {
-            path: self.borrow_path().clone(),
-            context: LoadModelErrorKind::OpenZipFile,
-            source: Some(source),
-        })
-    }
-}
-
-impl self::tokio::VoiceModel {
-    pub(crate) async fn read_inference_models(
-        &self,
-    ) -> LoadModelResult<EnumMap<InferenceOperationImpl, Vec<u8>>> {
-        let reader = AsyncVvmEntryReader::open(&self.header.path).await?;
-        let (decode_model_result, predict_duration_model_result, predict_intonation_model_result) =
-            join3(
-                reader.read_vvm_entry(self.header.manifest.decode_filename()),
-                reader.read_vvm_entry(self.header.manifest.predict_duration_filename()),
-                reader.read_vvm_entry(self.header.manifest.predict_intonation_filename()),
-            )
-            .await;
-
-        Ok(EnumMap::from_array([
-            predict_duration_model_result?,
-            predict_intonation_model_result?,
-            decode_model_result?,
-        ]))
-    }
-    /// VVMファイルから`VoiceModel`をコンストラクトする。
-    pub async fn from_path(path: impl AsRef<Path>) -> Result<Self> {
-        let reader = AsyncVvmEntryReader::open(path.as_ref()).await?;
-        let manifest = reader.read_vvm_json::<Manifest>("manifest.json").await?;
-        let metas = reader
-            .read_vvm_json::<VoiceModelMeta>(manifest.metas_filename())
-            .await?;
-        let id = VoiceModelId::new(nanoid!());
-
-        Ok(Self {
-            header: VoiceModelHeader {
-                id,
-                metas,
-                manifest,
-                path: path.as_ref().into(),
-            },
-        })
-    }
-
-    /// ID。
-    pub fn id(&self) -> &VoiceModelId {
-        &self.header.id
-    }
-
-    /// メタ情報。
-    pub fn metas(&self) -> &VoiceModelMeta {
-        &self.header.metas
-    }
-
-    pub(crate) fn header(&self) -> &VoiceModelHeader {
-        &self.header
-    }
-}
-
-struct AsyncVvmEntry {
-    index: usize,
-    entry: async_zip::ZipEntry,
-}
-
-#[derive(new)]
-struct AsyncVvmEntryReader {
-    reader: async_zip::read::fs::ZipFileReader,
-    entry_map: HashMap<String, AsyncVvmEntry>,
-}
-
-impl AsyncVvmEntryReader {
-    async fn open(path: &Path) -> LoadModelResult<Self> {
-        let reader = async_zip::read::fs::ZipFileReader::new(path)
-            .await
-            .map_err(|source| LoadModelError {
-                path: path.to_owned(),
-                context: LoadModelErrorKind::OpenZipFile,
-                source: Some(source.into()),
-            })?;
-        let entry_map: HashMap<_, _> = reader
-            .file()
-            .entries()
-            .iter()
-            .filter(|e| !e.entry().dir())
-            .enumerate()
-            .map(|(i, e)| {
-                (
-                    e.entry().filename().to_string(),
-                    AsyncVvmEntry {
-                        index: i,
-                        entry: e.entry().clone(),
-                    },
-                )
-            })
-            .collect();
-        Ok(AsyncVvmEntryReader::new(reader, entry_map))
-    }
-    async fn read_vvm_json<T: DeserializeOwned>(&self, filename: &str) -> LoadModelResult<T> {
-        let bytes = self.read_vvm_entry(filename).await?;
-        serde_json::from_slice(&bytes).map_err(|source| LoadModelError {
-            path: self.reader.path().to_owned(),
-            context: LoadModelErrorKind::ReadZipEntry {
-                filename: filename.to_owned(),
-            },
-            source: Some(source.into()),
-        })
-    }
-
-    async fn read_vvm_entry(&self, filename: &str) -> LoadModelResult<Vec<u8>> {
-        async {
-            let me = self
-                .entry_map
-                .get(filename)
-                .ok_or_else(|| io::Error::from(io::ErrorKind::NotFound))?;
-            let mut manifest_reader = self.reader.entry(me.index).await?;
-            let mut buf = Vec::with_capacity(me.entry.uncompressed_size() as usize);
-            manifest_reader
-                .read_to_end_checked(&mut buf, &me.entry)
-                .await?;
-            Ok::<_, anyhow::Error>(buf)
-        }
-        .await
-        .map_err(|source| LoadModelError {
-            path: self.reader.path().to_owned(),
-            context: LoadModelErrorKind::ReadZipEntry {
-                filename: filename.to_owned(),
-            },
-            source: Some(source),
-        })
-    }
-}
-
 // FIXME: "header"といいつつ、VVMのファイルパスを持っている状態になっている。
 /// 音声モデルが持つ、各モデルファイルの実体を除く情報。
 ///
@@ -297,25 +58,289 @@ impl VoiceModelHeader {
 }
 
 pub(crate) mod blocking {
-    use super::VoiceModelHeader;
+    use std::{
+        io::{self, Cursor},
+        path::Path,
+    };
+
+    use enum_map::EnumMap;
+    use nanoid::nanoid;
+    use ouroboros::self_referencing;
+    use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
+    use serde::de::DeserializeOwned;
+
+    use crate::{
+        error::{LoadModelError, LoadModelErrorKind, LoadModelResult},
+        infer::domain::InferenceOperationImpl,
+        manifest::Manifest,
+        VoiceModelMeta,
+    };
+
+    use super::{VoiceModelHeader, VoiceModelId};
 
     /// 音声モデル。
     ///
     /// VVMファイルと対応する。
     #[derive(Clone)]
     pub struct VoiceModel {
-        pub(super) header: VoiceModelHeader,
+        header: VoiceModelHeader,
+    }
+
+    impl self::VoiceModel {
+        pub(crate) fn read_inference_models(
+            &self,
+        ) -> LoadModelResult<EnumMap<InferenceOperationImpl, Vec<u8>>> {
+            let reader = BlockingVvmEntryReader::open(&self.header.path)?;
+
+            let model_bytes = [
+                self.header.manifest.predict_duration_filename(),
+                self.header.manifest.predict_intonation_filename(),
+                self.header.manifest.decode_filename(),
+            ]
+            .into_par_iter()
+            .map(|filename| reader.read_vvm_entry(filename))
+            .collect::<std::result::Result<Vec<_>, _>>()?
+            .try_into()
+            .unwrap_or_else(|_| panic!("should be same length"));
+
+            Ok(EnumMap::from_array(model_bytes))
+        }
+
+        /// VVMファイルから`VoiceModel`をコンストラクトする。
+        pub fn from_path(path: impl AsRef<Path>) -> crate::Result<Self> {
+            let path = path.as_ref().to_owned();
+            let reader = BlockingVvmEntryReader::open(&path)?;
+            let manifest = reader.read_vvm_json::<Manifest>("manifest.json")?;
+            let metas = reader.read_vvm_json(manifest.metas_filename())?;
+            let id = VoiceModelId::new(nanoid!());
+
+            Ok(Self {
+                header: VoiceModelHeader {
+                    id,
+                    metas,
+                    manifest,
+                    path,
+                },
+            })
+        }
+
+        /// ID。
+        pub fn id(&self) -> &VoiceModelId {
+            &self.header.id
+        }
+
+        /// メタ情報。
+        pub fn metas(&self) -> &VoiceModelMeta {
+            &self.header.metas
+        }
+
+        pub(crate) fn header(&self) -> &VoiceModelHeader {
+            &self.header
+        }
+    }
+
+    #[self_referencing]
+    struct BlockingVvmEntryReader {
+        path: std::path::PathBuf,
+        zip: Vec<u8>,
+        #[covariant]
+        #[borrows(zip)]
+        reader: zip::ZipArchive<Cursor<&'this [u8]>>,
+    }
+
+    impl BlockingVvmEntryReader {
+        fn open(path: &Path) -> LoadModelResult<Self> {
+            (|| {
+                let zip = std::fs::read(path)?;
+                Self::try_new(path.to_owned(), zip, |zip| {
+                    zip::ZipArchive::new(Cursor::new(zip))
+                })
+            })()
+            .map_err(|source| LoadModelError {
+                path: path.to_owned(),
+                context: LoadModelErrorKind::OpenZipFile,
+                source: Some(source.into()),
+            })
+        }
+
+        fn read_vvm_json<T: DeserializeOwned>(&self, filename: &str) -> LoadModelResult<T> {
+            let bytes = &self.read_vvm_entry(filename)?;
+            serde_json::from_slice(bytes).map_err(|source| LoadModelError {
+                path: self.borrow_path().clone(),
+                context: LoadModelErrorKind::OpenZipFile,
+                source: Some(source.into()),
+            })
+        }
+
+        fn read_vvm_entry(&self, filename: &str) -> LoadModelResult<Vec<u8>> {
+            (|| {
+                let mut reader = self.borrow_reader().clone();
+                let mut entry = reader.by_name(filename)?;
+                let mut buf = Vec::with_capacity(entry.size() as _);
+                io::copy(&mut entry, &mut buf)?;
+                Ok(buf)
+            })()
+            .map_err(|source| LoadModelError {
+                path: self.borrow_path().clone(),
+                context: LoadModelErrorKind::OpenZipFile,
+                source: Some(source),
+            })
+        }
     }
 }
 
 pub(crate) mod tokio {
-    use super::VoiceModelHeader;
+    use std::{collections::HashMap, io, path::Path};
+
+    use derive_new::new;
+    use enum_map::EnumMap;
+    use futures::future::join3;
+    use nanoid::nanoid;
+    use serde::de::DeserializeOwned;
+
+    use crate::{
+        error::{LoadModelError, LoadModelErrorKind, LoadModelResult},
+        infer::domain::InferenceOperationImpl,
+        manifest::Manifest,
+        Result, VoiceModelMeta,
+    };
+
+    use super::{VoiceModelHeader, VoiceModelId};
 
     /// 音声モデル。
     ///
     /// VVMファイルと対応する。
     #[derive(Clone)]
     pub struct VoiceModel {
-        pub(super) header: VoiceModelHeader,
+        header: VoiceModelHeader,
+    }
+
+    impl self::VoiceModel {
+        pub(crate) async fn read_inference_models(
+            &self,
+        ) -> LoadModelResult<EnumMap<InferenceOperationImpl, Vec<u8>>> {
+            let reader = AsyncVvmEntryReader::open(&self.header.path).await?;
+            let (
+                decode_model_result,
+                predict_duration_model_result,
+                predict_intonation_model_result,
+            ) = join3(
+                reader.read_vvm_entry(self.header.manifest.decode_filename()),
+                reader.read_vvm_entry(self.header.manifest.predict_duration_filename()),
+                reader.read_vvm_entry(self.header.manifest.predict_intonation_filename()),
+            )
+            .await;
+
+            Ok(EnumMap::from_array([
+                predict_duration_model_result?,
+                predict_intonation_model_result?,
+                decode_model_result?,
+            ]))
+        }
+        /// VVMファイルから`VoiceModel`をコンストラクトする。
+        pub async fn from_path(path: impl AsRef<Path>) -> Result<Self> {
+            let reader = AsyncVvmEntryReader::open(path.as_ref()).await?;
+            let manifest = reader.read_vvm_json::<Manifest>("manifest.json").await?;
+            let metas = reader
+                .read_vvm_json::<VoiceModelMeta>(manifest.metas_filename())
+                .await?;
+            let id = VoiceModelId::new(nanoid!());
+
+            Ok(Self {
+                header: VoiceModelHeader {
+                    id,
+                    metas,
+                    manifest,
+                    path: path.as_ref().into(),
+                },
+            })
+        }
+
+        /// ID。
+        pub fn id(&self) -> &VoiceModelId {
+            &self.header.id
+        }
+
+        /// メタ情報。
+        pub fn metas(&self) -> &VoiceModelMeta {
+            &self.header.metas
+        }
+
+        pub(crate) fn header(&self) -> &VoiceModelHeader {
+            &self.header
+        }
+    }
+
+    struct AsyncVvmEntry {
+        index: usize,
+        entry: async_zip::ZipEntry,
+    }
+
+    #[derive(new)]
+    struct AsyncVvmEntryReader {
+        reader: async_zip::read::fs::ZipFileReader,
+        entry_map: HashMap<String, AsyncVvmEntry>,
+    }
+
+    impl AsyncVvmEntryReader {
+        async fn open(path: &Path) -> LoadModelResult<Self> {
+            let reader = async_zip::read::fs::ZipFileReader::new(path)
+                .await
+                .map_err(|source| LoadModelError {
+                    path: path.to_owned(),
+                    context: LoadModelErrorKind::OpenZipFile,
+                    source: Some(source.into()),
+                })?;
+            let entry_map: HashMap<_, _> = reader
+                .file()
+                .entries()
+                .iter()
+                .filter(|e| !e.entry().dir())
+                .enumerate()
+                .map(|(i, e)| {
+                    (
+                        e.entry().filename().to_string(),
+                        AsyncVvmEntry {
+                            index: i,
+                            entry: e.entry().clone(),
+                        },
+                    )
+                })
+                .collect();
+            Ok(AsyncVvmEntryReader::new(reader, entry_map))
+        }
+        async fn read_vvm_json<T: DeserializeOwned>(&self, filename: &str) -> LoadModelResult<T> {
+            let bytes = self.read_vvm_entry(filename).await?;
+            serde_json::from_slice(&bytes).map_err(|source| LoadModelError {
+                path: self.reader.path().to_owned(),
+                context: LoadModelErrorKind::ReadZipEntry {
+                    filename: filename.to_owned(),
+                },
+                source: Some(source.into()),
+            })
+        }
+
+        async fn read_vvm_entry(&self, filename: &str) -> LoadModelResult<Vec<u8>> {
+            async {
+                let me = self
+                    .entry_map
+                    .get(filename)
+                    .ok_or_else(|| io::Error::from(io::ErrorKind::NotFound))?;
+                let mut manifest_reader = self.reader.entry(me.index).await?;
+                let mut buf = Vec::with_capacity(me.entry.uncompressed_size() as usize);
+                manifest_reader
+                    .read_to_end_checked(&mut buf, &me.entry)
+                    .await?;
+                Ok::<_, anyhow::Error>(buf)
+            }
+            .await
+            .map_err(|source| LoadModelError {
+                path: self.reader.path().to_owned(),
+                context: LoadModelErrorKind::ReadZipEntry {
+                    filename: filename.to_owned(),
+                },
+                source: Some(source),
+            })
+        }
     }
 }
diff --git a/crates/voicevox_core_c_api/tests/e2e/log_mask.rs b/crates/voicevox_core_c_api/tests/e2e/log_mask.rs
index ce364c1be..b28442bdb 100644
--- a/crates/voicevox_core_c_api/tests/e2e/log_mask.rs
+++ b/crates/voicevox_core_c_api/tests/e2e/log_mask.rs
@@ -23,7 +23,7 @@ impl Utf8Output {
     pub(crate) fn mask_windows_video_cards(self) -> Self {
         self.mask_stderr(
             static_regex!(
-                r#"(?m)^\{timestamp\}  INFO voicevox_core::synthesizer: 検出されたGPU \(DirectMLには1番目のGPUが使われます\):(\n\{timestamp\}  INFO voicevox_core::synthesizer:   - "[^"]+" \([0-9.]+ [a-zA-Z]+\))+"#,
+                r#"(?m)^\{timestamp\}  INFO voicevox_core::synthesizer::blocking: 検出されたGPU \(DirectMLには1番目のGPUが使われます\):(\n\{timestamp\}  INFO voicevox_core::synthesizer::blocking:   - "[^"]+" \([0-9.]+ [a-zA-Z]+\))+"#,
             ),
             "{windows-video-cards}",
         )