Skip to content

Commit

Permalink
Initial support of aivisspeech
Browse files Browse the repository at this point in the history
  • Loading branch information
h2yk committed Nov 20, 2024
1 parent 043f705 commit 4db3dea
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 7 deletions.
3 changes: 3 additions & 0 deletions config-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ voices:
coeiroink:
enabled: false
api:
aivisspeech:
enabled: false
api:
aquestalkproxy:
enabled: false
api:
Expand Down
8 changes: 6 additions & 2 deletions lib/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ type Config struct {
Enabled bool
Api string
}
AivisSpeech struct {
Enabled bool
Api string
}
AquestalkProxy struct {
Enabled bool
Api string
Expand Down Expand Up @@ -100,7 +104,7 @@ func init() {
log.Fatal("Config parse failed:", err)
}

//verify
// verify
if CurrentConfig.Debug {
log.Print("Debug is enabled")
}
Expand All @@ -120,7 +124,7 @@ func init() {
func VerifyGuild(guild *Guild) error {
val, exists := Lang[guild.Lang]
if !exists {
return errors.New("no such language") //Don't use nil val!
return errors.New("no such language") // Don't use nil val!
}
guilderrorstr := val.Error.Guild
if len(guild.Prefix) != 1 {
Expand Down
133 changes: 133 additions & 0 deletions lib/voices/aivisspeech.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
package voices

import (
"bytes"
"encoding/json"
"errors"
"io"
"log"
"net/http"
"net/url"
"strconv"

"github.com/tpc3/Kotone-DiVE/lib/config"
)

var AivisSpeech aivisspeech

type aivisspeech struct {
Info VoiceInfo
Speakers Speakers
Request *http.Request
}

// These structs are currently used from voicevox implementation
// type Speakers struct
// type Speaker struct
// type Style struct

// AivisSpeech is the another engine implementation based on the voicevox.
// They uses almost identical api, but does not provide official docker image (There's Dockerfile though).

func init() {
AivisSpeech = aivisspeech{
Info: VoiceInfo{
Type: "aivisspeech",
Format: "pcm",
Container: "wav",
ReEncodeRequired: true,
Enabled: config.CurrentConfig.Voices.AivisSpeech.Enabled,
},
}
if !config.CurrentConfig.Voices.AivisSpeech.Enabled {
log.Print("WARN: AivisSpeech is disabled")
return
}
res, err := http.Get(config.CurrentConfig.Voices.AivisSpeech.Api + "/speakers")
if err != nil {
log.Fatal(err)
}
defer res.Body.Close()
bin, err := io.ReadAll(res.Body)
if err != nil {
log.Fatal(err)
}
err = json.Unmarshal([]byte("{ \"speakers\": "+string(bin)+" }"), &AivisSpeech.Speakers)
if err != nil {
log.Fatal(err)
}
request, err := http.NewRequest(http.MethodPost, config.CurrentConfig.Voices.AivisSpeech.Api+"/synthesis", nil)
if err != nil {
log.Fatal(err)
}
AivisSpeech.Request = request
}

func (voiceSource aivisspeech) Synth(content string, voice string) ([]byte, error) {
id := -1
for _, speaker := range voiceSource.Speakers.Speakers {
for _, v := range speaker.Styles {
if speaker.Name+v.Name == voice {
id = v.Id
break
}
}
}
if id == -1 {
return nil, errors.New("no such voice")
}

// copy
res, err := http.Post(config.CurrentConfig.Voices.AivisSpeech.Api+"/audio_query?speaker="+strconv.Itoa(id)+"&text="+url.QueryEscape(content), "", nil)
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode != 200 {
return nil, errors.New("Response code error from aivisspeech:" + strconv.Itoa(res.StatusCode))
}

// copy
req := *voiceSource.Request

query := res.Body
buf := new(bytes.Buffer)
length, err := buf.ReadFrom(query)
if err != nil {
return nil, err
}
req.URL.RawQuery = "speaker=" + strconv.Itoa(id)
req.Body = io.NopCloser(buf)
req.ContentLength = length
req.GetBody = func() (io.ReadCloser, error) { return req.Body, nil }
req.Header.Set("Content-Type", "application/json")

res, err = httpCli.Do(&req)
if err != nil {
return nil, err
}
defer res.Body.Close()
bin, err := io.ReadAll(res.Body)
if err != nil {
return nil, err
}
if res.StatusCode != 200 {
return nil, errors.New("Response code error from aivisspeech:" + strconv.Itoa(res.StatusCode) + " " + string(bin))
}
return bin, nil
}

func (voiceSource aivisspeech) Verify(voice string) error {
for _, speaker := range voiceSource.Speakers.Speakers {
for _, v := range speaker.Styles {
if speaker.Name+v.Name == voice {
return nil
}
}
}
return errors.New("no such voice")
}

func (voiceSource aivisspeech) GetInfo() VoiceInfo {
return voiceSource.Info
}
13 changes: 8 additions & 5 deletions lib/voices/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@ package voices

import (
"errors"
"github.com/tpc3/Kotone-DiVE/lib/config"
"github.com/tpc3/Kotone-DiVE/lib/db"
"github.com/tpc3/Kotone-DiVE/lib/utils"
"hash/crc64"
"io"
"log"
"net/http"
"strconv"
"time"

"github.com/tpc3/Kotone-DiVE/lib/config"
"github.com/tpc3/Kotone-DiVE/lib/db"
"github.com/tpc3/Kotone-DiVE/lib/utils"

"github.com/bwmarrin/discordgo"
"github.com/patrickmn/go-cache"
)
Expand Down Expand Up @@ -63,6 +64,8 @@ func SourceSwitcher(source string) (VoiceSource, error) {
voiceSource = Voicevox
case Coeiroink.Info.Type:
voiceSource = Coeiroink
case AivisSpeech.Info.Type:
voiceSource = AivisSpeech
case AquestalkProxy.Info.Type:
voiceSource = AquestalkProxy
default:
Expand Down Expand Up @@ -102,7 +105,7 @@ func GetVoice(content string, voice *config.Voice) ([]byte, error) {
}

if bin == nil {
//Nothing to read
// Nothing to read
return nil, nil
}

Expand Down Expand Up @@ -139,7 +142,7 @@ func ReadVoice(session *discordgo.Session, orgMsg *discordgo.MessageCreate, enco
}

if session.VoiceConnections[orgMsg.GuildID] == nil {
return Skipped //Skipped due to the disconnection
return Skipped // Skipped due to the disconnection
}

frames, err := SplitToFrame(encoded)
Expand Down

0 comments on commit 4db3dea

Please sign in to comment.