From 2024302316f52a4ec06f2bb010226560c8ad34c9 Mon Sep 17 00:00:00 2001 From: Deng Junhai Date: Sat, 22 Jun 2024 01:06:27 +0800 Subject: [PATCH 1/4] chore: Define sending defaults based on different device types (#204) --- app/src/store/settings.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/src/store/settings.ts b/app/src/store/settings.ts index da14476a..a03d18e7 100644 --- a/app/src/store/settings.ts +++ b/app/src/store/settings.ts @@ -6,13 +6,14 @@ import { setNumberMemory, } from "@/utils/memory.ts"; import { RootState } from "@/store/index.ts"; +import { isMobile } from "@/utils/device"; -export const sendKeys = ["Ctrl + Enter", "Enter"]; +export const sendKeys = isMobile() ? ["Ctrl + Enter", "Enter"] : ["Enter", "Ctrl + Enter"]; export const initialSettings = { context: true, align: false, history: 8, - sender: false, + sender: isMobile(), // Defaults to true (Enter) in the case of mobile and false (Ctrl + Enter) on PCs max_tokens: 2000, temperature: 0.6, top_p: 1, From 4c3843b3be3a402be5937e88cdcc802bcc547836 Mon Sep 17 00:00:00 2001 From: Deng Junhai Date: Sat, 22 Jun 2024 01:23:27 +0800 Subject: [PATCH 2/4] feat: update and optimize tokenizer performance (#191) Co-Authored-By: Minghan Zhang <112773885+zmh-program@users.noreply.github.com> --- globals/logger.go | 5 +++-- utils/buffer.go | 11 +++-------- utils/tokenizer.go | 30 ++++++++++++++++++------------ 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/globals/logger.go b/globals/logger.go index 6f1eeb51..70ea0149 100644 --- a/globals/logger.go +++ b/globals/logger.go @@ -2,10 +2,11 @@ package globals import ( "fmt" + "strings" + "github.com/natefinch/lumberjack" "github.com/sirupsen/logrus" "github.com/spf13/viper" - "strings" ) const DefaultLoggerFile = "chatnio.log" @@ -25,7 +26,7 @@ func (l *AppLogger) Format(entry *logrus.Entry) ([]byte, error) { ) if !viper.GetBool("log.ignore_console") { - fmt.Println(data) + fmt.Print(data) } return []byte(data), nil diff --git a/utils/buffer.go b/utils/buffer.go index 00e3e82e..99b409fe 100644 --- a/utils/buffer.go +++ b/utils/buffer.go @@ -197,11 +197,6 @@ func (b *Buffer) IsFunctionCalling() bool { return b.FunctionCall != nil || b.ToolCalls != nil } -func (b *Buffer) WriteBytes(data []byte) []byte { - b.Write(string(data)) - return data -} - func (b *Buffer) IsEmpty() bool { return b.Cursor == 0 && !b.IsFunctionCalling() } @@ -237,12 +232,12 @@ func (b *Buffer) SetInputTokens(tokens int) { b.InputTokens = tokens } -func (b *Buffer) CountInputToken() int { - return b.InputTokens +func (b *Buffer) CountOutputToken() int { + return b.ReadTimes() * GetWeightByModel(b.Model) } func (b *Buffer) CountOutputToken() int { - return b.ReadTimes() * GetWeightByModel(b.Model) + return b.CountInputToken() + b.CountOutputToken() } func (b *Buffer) CountToken() int { diff --git a/utils/tokenizer.go b/utils/tokenizer.go index de38c010..7784f7dd 100644 --- a/utils/tokenizer.go +++ b/utils/tokenizer.go @@ -3,8 +3,9 @@ package utils import ( "chat/globals" "fmt" - "github.com/pkoukk/tiktoken-go" "strings" + + "github.com/pkoukk/tiktoken-go" ) // Using https://github.com/pkoukk/tiktoken-go @@ -45,9 +46,10 @@ func GetWeightByModel(model string) int { } } } -func NumTokensFromMessages(messages []globals.Message, model string) (tokens int) { +func NumTokensFromMessages(messages []globals.Message, model string, responseType bool) (tokens int) { tokensPerMessage := GetWeightByModel(model) tkm, err := tiktoken.EncodingForModel(model) + if err != nil { // the method above was deprecated, use the recall method instead // can not encode messages, use length of messages as a proxy for number of tokens @@ -59,16 +61,20 @@ func NumTokensFromMessages(messages []globals.Message, model string) (tokens int if globals.DebugMode { globals.Debug(fmt.Sprintf("[tiktoken] error encoding messages: %s (model: %s), using default model instead", err, model)) } - return NumTokensFromMessages(messages, globals.GPT3Turbo0613) + return NumTokensFromMessages(messages, globals.GPT3Turbo0613, responseType) } for _, message := range messages { - tokens += - len(tkm.Encode(message.Content, nil, nil)) + - len(tkm.Encode(message.Role, nil, nil)) + - tokensPerMessage + tokens += len(tkm.Encode(message.Content, nil, nil)) + + if !responseType { + tokens += len(tkm.Encode(message.Role, nil, nil)) + tokensPerMessage + } + } + + if !responseType { + tokens += 3 // every reply is primed with <|start|>assistant<|message|> } - tokens += 3 // every reply is primed with <|start|>assistant<|message|> if globals.DebugMode { globals.Debug(fmt.Sprintf("[tiktoken] num tokens from messages: %d (tokens per message: %d, model: %s)", tokens, tokensPerMessage, model)) @@ -76,8 +82,8 @@ func NumTokensFromMessages(messages []globals.Message, model string) (tokens int return tokens } -func CountTokenPrice(messages []globals.Message, model string) int { - return NumTokensFromMessages(messages, model) * GetWeightByModel(model) +func NumTokensFromResponse(response string, model string) int { + return NumTokensFromMessages([]globals.Message{{Content: response}}, model, true) } func CountInputQuota(charge Charge, token int) float32 { @@ -88,10 +94,10 @@ func CountInputQuota(charge Charge, token int) float32 { return 0 } -func CountOutputToken(charge Charge, model string, token int) float32 { +func CountOutputToken(charge Charge, token int) float32 { switch charge.GetType() { case globals.TokenBilling: - return float32(token*GetWeightByModel(model)) / 1000 * charge.GetOutput() + return float32(token) / 1000 * charge.GetOutput() case globals.TimesBilling: return charge.GetOutput() default: From c81b599e907a79ff46c9e6c5551844e4490309f2 Mon Sep 17 00:00:00 2001 From: Deng Junhai Date: Sat, 22 Jun 2024 01:59:27 +0800 Subject: [PATCH 3/4] feat: update and optimize tokenizer performance (#191) Co-Authored-By: Minghan Zhang <112773885+zmh-program@users.noreply.github.com> --- globals/logger.go | 5 +++-- manager/chat_completions.go | 4 ++-- utils/buffer.go | 21 +++++++++++---------- utils/tokenizer.go | 30 ++++++++++++++++++------------ 4 files changed, 34 insertions(+), 26 deletions(-) diff --git a/globals/logger.go b/globals/logger.go index 6f1eeb51..70ea0149 100644 --- a/globals/logger.go +++ b/globals/logger.go @@ -2,10 +2,11 @@ package globals import ( "fmt" + "strings" + "github.com/natefinch/lumberjack" "github.com/sirupsen/logrus" "github.com/spf13/viper" - "strings" ) const DefaultLoggerFile = "chatnio.log" @@ -25,7 +26,7 @@ func (l *AppLogger) Format(entry *logrus.Entry) ([]byte, error) { ) if !viper.GetBool("log.ignore_console") { - fmt.Println(data) + fmt.Print(data) } return []byte(data), nil diff --git a/manager/chat_completions.go b/manager/chat_completions.go index ed7152ed..c9b887ca 100644 --- a/manager/chat_completions.go +++ b/manager/chat_completions.go @@ -154,7 +154,7 @@ func sendTranshipmentResponse(c *gin.Context, form RelayForm, messages []globals }, Usage: Usage{ PromptTokens: buffer.CountInputToken(), - CompletionTokens: buffer.CountOutputToken(), + CompletionTokens: buffer.CountOutputToken(false), TotalTokens: buffer.CountToken(), }, Quota: utils.Multi[*float32](form.Official, nil, utils.ToPtr(buffer.GetQuota())), @@ -205,7 +205,7 @@ func getStreamTranshipmentForm(id string, created int64, form RelayForm, data *g }, Usage: Usage{ PromptTokens: buffer.CountInputToken(), - CompletionTokens: buffer.CountOutputToken(), + CompletionTokens: buffer.CountOutputToken(true), TotalTokens: buffer.CountToken(), }, Quota: utils.Multi[*float32](form.Official, nil, utils.ToPtr(buffer.GetQuota())), diff --git a/utils/buffer.go b/utils/buffer.go index 00e3e82e..fe0af4bf 100644 --- a/utils/buffer.go +++ b/utils/buffer.go @@ -57,7 +57,7 @@ func initInputToken(model string, history []globals.Message) int { }) } - return CountTokenPrice(history, model) + return NumTokensFromMessages(history, model, false) } func NewBuffer(model string, history []globals.Message, charge Charge) *Buffer { @@ -79,7 +79,7 @@ func (b *Buffer) GetCursor() int { } func (b *Buffer) GetQuota() float32 { - return b.Quota + CountOutputToken(b.Charge, b.Model, b.ReadTimes()) + return b.Quota + CountOutputToken(b.Charge, b.CountOutputToken(false)) } func (b *Buffer) Write(data string) string { @@ -197,11 +197,6 @@ func (b *Buffer) IsFunctionCalling() bool { return b.FunctionCall != nil || b.ToolCalls != nil } -func (b *Buffer) WriteBytes(data []byte) []byte { - b.Write(string(data)) - return data -} - func (b *Buffer) IsEmpty() bool { return b.Cursor == 0 && !b.IsFunctionCalling() } @@ -241,10 +236,16 @@ func (b *Buffer) CountInputToken() int { return b.InputTokens } -func (b *Buffer) CountOutputToken() int { - return b.ReadTimes() * GetWeightByModel(b.Model) +func (b *Buffer) CountOutputToken(running bool) int { + if running { + // performance optimization: + // if the buffer is still running, the output token counted using the times instead + return b.Times + } + + return NumTokensFromResponse(b.Read(), b.Model) } func (b *Buffer) CountToken() int { - return b.CountInputToken() + b.CountOutputToken() + return b.CountInputToken() + b.CountOutputToken(false) } diff --git a/utils/tokenizer.go b/utils/tokenizer.go index de38c010..7784f7dd 100644 --- a/utils/tokenizer.go +++ b/utils/tokenizer.go @@ -3,8 +3,9 @@ package utils import ( "chat/globals" "fmt" - "github.com/pkoukk/tiktoken-go" "strings" + + "github.com/pkoukk/tiktoken-go" ) // Using https://github.com/pkoukk/tiktoken-go @@ -45,9 +46,10 @@ func GetWeightByModel(model string) int { } } } -func NumTokensFromMessages(messages []globals.Message, model string) (tokens int) { +func NumTokensFromMessages(messages []globals.Message, model string, responseType bool) (tokens int) { tokensPerMessage := GetWeightByModel(model) tkm, err := tiktoken.EncodingForModel(model) + if err != nil { // the method above was deprecated, use the recall method instead // can not encode messages, use length of messages as a proxy for number of tokens @@ -59,16 +61,20 @@ func NumTokensFromMessages(messages []globals.Message, model string) (tokens int if globals.DebugMode { globals.Debug(fmt.Sprintf("[tiktoken] error encoding messages: %s (model: %s), using default model instead", err, model)) } - return NumTokensFromMessages(messages, globals.GPT3Turbo0613) + return NumTokensFromMessages(messages, globals.GPT3Turbo0613, responseType) } for _, message := range messages { - tokens += - len(tkm.Encode(message.Content, nil, nil)) + - len(tkm.Encode(message.Role, nil, nil)) + - tokensPerMessage + tokens += len(tkm.Encode(message.Content, nil, nil)) + + if !responseType { + tokens += len(tkm.Encode(message.Role, nil, nil)) + tokensPerMessage + } + } + + if !responseType { + tokens += 3 // every reply is primed with <|start|>assistant<|message|> } - tokens += 3 // every reply is primed with <|start|>assistant<|message|> if globals.DebugMode { globals.Debug(fmt.Sprintf("[tiktoken] num tokens from messages: %d (tokens per message: %d, model: %s)", tokens, tokensPerMessage, model)) @@ -76,8 +82,8 @@ func NumTokensFromMessages(messages []globals.Message, model string) (tokens int return tokens } -func CountTokenPrice(messages []globals.Message, model string) int { - return NumTokensFromMessages(messages, model) * GetWeightByModel(model) +func NumTokensFromResponse(response string, model string) int { + return NumTokensFromMessages([]globals.Message{{Content: response}}, model, true) } func CountInputQuota(charge Charge, token int) float32 { @@ -88,10 +94,10 @@ func CountInputQuota(charge Charge, token int) float32 { return 0 } -func CountOutputToken(charge Charge, model string, token int) float32 { +func CountOutputToken(charge Charge, token int) float32 { switch charge.GetType() { case globals.TokenBilling: - return float32(token*GetWeightByModel(model)) / 1000 * charge.GetOutput() + return float32(token) / 1000 * charge.GetOutput() case globals.TimesBilling: return charge.GetOutput() default: From 78ff6a19a4c828ccdf24f1bd9c890e1b90432c68 Mon Sep 17 00:00:00 2001 From: Deng Junhai Date: Sat, 22 Jun 2024 02:36:00 +0800 Subject: [PATCH 4/4] fix: fix tool calls `required` omitempty field Co-Authored-By: Minghan Zhang <112773885+zmh-program@users.noreply.github.com> --- adapter/skylark/formatter.go | 11 +++++++---- globals/tools.go | 5 +++-- utils/tokenizer.go | 4 ++++ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/adapter/skylark/formatter.go b/adapter/skylark/formatter.go index 6e3cab39..38db6955 100644 --- a/adapter/skylark/formatter.go +++ b/adapter/skylark/formatter.go @@ -3,6 +3,7 @@ package skylark import ( "chat/globals" "chat/utils" + structpb "github.com/golang/protobuf/ptypes/struct" "github.com/volcengine/volc-sdk-golang/service/maas/models/api" ) @@ -20,19 +21,21 @@ func getFunctionCall(calls *globals.ToolCalls) *api.FunctionCall { } func getType(p globals.ToolProperty) string { - if p.Type == nil { + t, ok := p["type"] + if !ok { return "string" } - return *p.Type + return t.(string) } func getDescription(p globals.ToolProperty) string { - if p.Description == nil { + desc, ok := p["description"] + if !ok { return "" } - return *p.Description + return desc.(string) } func getValue(p globals.ToolProperty) *structpb.Value { diff --git a/globals/tools.go b/globals/tools.go index 364964f1..368361ee 100644 --- a/globals/tools.go +++ b/globals/tools.go @@ -16,7 +16,7 @@ type ToolFunction struct { type ToolParameters struct { Type string `json:"type"` Properties ToolProperties `json:"properties"` - Required []string `json:"required"` + Required *[]string `json:"required,omitempty"` } type ToolProperties map[string]ToolProperty @@ -25,7 +25,8 @@ type ToolProperties map[string]ToolProperty type JsonSchemaType any type JSONSchemaDefinition any -type ToolProperty struct { +type ToolProperty map[string]interface{} +type DetailToolProperty struct { Type *string `json:"type,omitempty"` Enum *[]JsonSchemaType `json:"enum,omitempty"` Const *JsonSchemaType `json:"const,omitempty"` diff --git a/utils/tokenizer.go b/utils/tokenizer.go index 7784f7dd..5001ae98 100644 --- a/utils/tokenizer.go +++ b/utils/tokenizer.go @@ -83,6 +83,10 @@ func NumTokensFromMessages(messages []globals.Message, model string, responseTyp } func NumTokensFromResponse(response string, model string) int { + if len(response) == 0 { + return 0 + } + return NumTokensFromMessages([]globals.Message{{Content: response}}, model, true) }