From 4db3dea59020bdd70f7967f167c2f9d4b1e8c28d Mon Sep 17 00:00:00 2001 From: hayabusa2yk Date: Thu, 21 Nov 2024 06:34:58 +0900 Subject: [PATCH] Initial support of aivisspeech --- config-template.yaml | 3 + lib/config/config.go | 8 ++- lib/voices/aivisspeech.go | 133 ++++++++++++++++++++++++++++++++++++++ lib/voices/helper.go | 13 ++-- 4 files changed, 150 insertions(+), 7 deletions(-) create mode 100644 lib/voices/aivisspeech.go diff --git a/config-template.yaml b/config-template.yaml index 66c6cdb..af74311 100644 --- a/config-template.yaml +++ b/config-template.yaml @@ -37,6 +37,9 @@ voices: coeiroink: enabled: false api: + aivisspeech: + enabled: false + api: aquestalkproxy: enabled: false api: diff --git a/lib/config/config.go b/lib/config/config.go index ce9edc2..21c2927 100644 --- a/lib/config/config.go +++ b/lib/config/config.go @@ -55,6 +55,10 @@ type Config struct { Enabled bool Api string } + AivisSpeech struct { + Enabled bool + Api string + } AquestalkProxy struct { Enabled bool Api string @@ -100,7 +104,7 @@ func init() { log.Fatal("Config parse failed:", err) } - //verify + // verify if CurrentConfig.Debug { log.Print("Debug is enabled") } @@ -120,7 +124,7 @@ func init() { func VerifyGuild(guild *Guild) error { val, exists := Lang[guild.Lang] if !exists { - return errors.New("no such language") //Don't use nil val! + return errors.New("no such language") // Don't use nil val! } guilderrorstr := val.Error.Guild if len(guild.Prefix) != 1 { diff --git a/lib/voices/aivisspeech.go b/lib/voices/aivisspeech.go new file mode 100644 index 0000000..9f459e5 --- /dev/null +++ b/lib/voices/aivisspeech.go @@ -0,0 +1,133 @@ +package voices + +import ( + "bytes" + "encoding/json" + "errors" + "io" + "log" + "net/http" + "net/url" + "strconv" + + "github.com/tpc3/Kotone-DiVE/lib/config" +) + +var AivisSpeech aivisspeech + +type aivisspeech struct { + Info VoiceInfo + Speakers Speakers + Request *http.Request +} + +// These structs are currently used from voicevox implementation +// type Speakers struct +// type Speaker struct +// type Style struct + +// AivisSpeech is the another engine implementation based on the voicevox. +// They uses almost identical api, but does not provide official docker image (There's Dockerfile though). + +func init() { + AivisSpeech = aivisspeech{ + Info: VoiceInfo{ + Type: "aivisspeech", + Format: "pcm", + Container: "wav", + ReEncodeRequired: true, + Enabled: config.CurrentConfig.Voices.AivisSpeech.Enabled, + }, + } + if !config.CurrentConfig.Voices.AivisSpeech.Enabled { + log.Print("WARN: AivisSpeech is disabled") + return + } + res, err := http.Get(config.CurrentConfig.Voices.AivisSpeech.Api + "/speakers") + if err != nil { + log.Fatal(err) + } + defer res.Body.Close() + bin, err := io.ReadAll(res.Body) + if err != nil { + log.Fatal(err) + } + err = json.Unmarshal([]byte("{ \"speakers\": "+string(bin)+" }"), &AivisSpeech.Speakers) + if err != nil { + log.Fatal(err) + } + request, err := http.NewRequest(http.MethodPost, config.CurrentConfig.Voices.AivisSpeech.Api+"/synthesis", nil) + if err != nil { + log.Fatal(err) + } + AivisSpeech.Request = request +} + +func (voiceSource aivisspeech) Synth(content string, voice string) ([]byte, error) { + id := -1 + for _, speaker := range voiceSource.Speakers.Speakers { + for _, v := range speaker.Styles { + if speaker.Name+v.Name == voice { + id = v.Id + break + } + } + } + if id == -1 { + return nil, errors.New("no such voice") + } + + // copy + res, err := http.Post(config.CurrentConfig.Voices.AivisSpeech.Api+"/audio_query?speaker="+strconv.Itoa(id)+"&text="+url.QueryEscape(content), "", nil) + if err != nil { + return nil, err + } + defer res.Body.Close() + if res.StatusCode != 200 { + return nil, errors.New("Response code error from aivisspeech:" + strconv.Itoa(res.StatusCode)) + } + + // copy + req := *voiceSource.Request + + query := res.Body + buf := new(bytes.Buffer) + length, err := buf.ReadFrom(query) + if err != nil { + return nil, err + } + req.URL.RawQuery = "speaker=" + strconv.Itoa(id) + req.Body = io.NopCloser(buf) + req.ContentLength = length + req.GetBody = func() (io.ReadCloser, error) { return req.Body, nil } + req.Header.Set("Content-Type", "application/json") + + res, err = httpCli.Do(&req) + if err != nil { + return nil, err + } + defer res.Body.Close() + bin, err := io.ReadAll(res.Body) + if err != nil { + return nil, err + } + if res.StatusCode != 200 { + return nil, errors.New("Response code error from aivisspeech:" + strconv.Itoa(res.StatusCode) + " " + string(bin)) + } + return bin, nil +} + +func (voiceSource aivisspeech) Verify(voice string) error { + for _, speaker := range voiceSource.Speakers.Speakers { + for _, v := range speaker.Styles { + if speaker.Name+v.Name == voice { + return nil + } + } + } + return errors.New("no such voice") +} + +func (voiceSource aivisspeech) GetInfo() VoiceInfo { + return voiceSource.Info +} diff --git a/lib/voices/helper.go b/lib/voices/helper.go index 54757d4..109854d 100644 --- a/lib/voices/helper.go +++ b/lib/voices/helper.go @@ -2,9 +2,6 @@ package voices import ( "errors" - "github.com/tpc3/Kotone-DiVE/lib/config" - "github.com/tpc3/Kotone-DiVE/lib/db" - "github.com/tpc3/Kotone-DiVE/lib/utils" "hash/crc64" "io" "log" @@ -12,6 +9,10 @@ import ( "strconv" "time" + "github.com/tpc3/Kotone-DiVE/lib/config" + "github.com/tpc3/Kotone-DiVE/lib/db" + "github.com/tpc3/Kotone-DiVE/lib/utils" + "github.com/bwmarrin/discordgo" "github.com/patrickmn/go-cache" ) @@ -63,6 +64,8 @@ func SourceSwitcher(source string) (VoiceSource, error) { voiceSource = Voicevox case Coeiroink.Info.Type: voiceSource = Coeiroink + case AivisSpeech.Info.Type: + voiceSource = AivisSpeech case AquestalkProxy.Info.Type: voiceSource = AquestalkProxy default: @@ -102,7 +105,7 @@ func GetVoice(content string, voice *config.Voice) ([]byte, error) { } if bin == nil { - //Nothing to read + // Nothing to read return nil, nil } @@ -139,7 +142,7 @@ func ReadVoice(session *discordgo.Session, orgMsg *discordgo.MessageCreate, enco } if session.VoiceConnections[orgMsg.GuildID] == nil { - return Skipped //Skipped due to the disconnection + return Skipped // Skipped due to the disconnection } frames, err := SplitToFrame(encoded)