Skip to content

Commit

Permalink
调整支持按模型分别定制限速
Browse files Browse the repository at this point in the history
  • Loading branch information
gcslaoli committed Oct 11, 2024
1 parent 5a8d618 commit 1f64980
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 117 deletions.
15 changes: 9 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@ services:
ports:
- 9611:8080
environment:
LIMIT: 40 # 限制每个userToken允许的次数
PER: "3h" # 限制周期 1s, 1m, 1h, 1d, 1w, 1y
O1LIMIT: 10 # 限制每个userToken允许的O1模型次数
O1PER: "1w" # 限制周期 1s, 1m, 1h, 1d, 1w, 1y


OAIKEY: "" # OpenAI API key 用于内容审核
AUTO: "200/3h"
TEXT-DAVINCI-002-RENDER-SHA: "200/3h"
GPT-4O-MINI: "200/3h"
GPT-4O: "60/3h"
GPT-4: "20/3h"
GPT-4O-CANMORE: "30/3h"
O1-PREVIEW: "7/24h"
O1-MINI: "50/24h" # 模型名称: "次数/时间" 时间单位: h(小时) m(分钟) s(秒) 模型名称要改成大写

```

Expand Down
86 changes: 22 additions & 64 deletions api/audit_limit.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func AuditLimit(r *ghttp.Request) {
}

// OPENAI Moderation 检测
if config.OAIKEY != "" {
if config.OAIKEY != "" && prompt != "" {
// 检测是否包含违规内容
respVar := g.Client().SetHeaderMap(g.MapStrStr{
"Authorization": "Bearer " + config.OAIKEY,
Expand All @@ -75,71 +75,29 @@ func AuditLimit(r *ghttp.Request) {
return
}
}
// 先判断是否为o1模型
if config.O1Models.Contains(model) {
limiter := GetVisitor(token+"|o1model", config.O1LIMIT, config.O1PER)
// 获取剩余次数
remain := limiter.TokensAt(time.Now())
g.Log().Debug(ctx, "remain", remain)
if remain < 1 {
r.Response.Status = 429
// resMsg := gjson.New(MsgO1429)
// 根据remain计算需要等待的时间
// 生产间隔
creatInterval := config.O1PER / time.Duration(config.O1LIMIT)
// 转换为秒
creatIntervalSec := float64(creatInterval.Seconds())
// 等待时间
wait := (1 - remain) * creatIntervalSec
g.Log().Debug(ctx, "wait", wait, "creatIntervalSec", creatIntervalSec)
// resMsg.Set("detail.clears_in", int(wait))
// r.Response.WriteJson(resMsg)
r.Response.WriteJson(g.Map{
// "detail:":"您已经触发使用频率限制,当前限制为 "+ gconv.String(config.O1LIMIT) + " 次/"+ gconv.String(config.O1PER) + ",请等待 " + gconv.String(int(wait)) + " 秒后再试.",
"detail": "You have triggered the usage frequency limit, the current limit is " + gconv.String(config.O1LIMIT) + " times/" + gconv.String(config.O1PER) + ", please wait " + gconv.String(int(wait)) + " seconds before trying again.\n" + "您已经触发使用频率限制,当前限制为 " + gconv.String(config.O1LIMIT) + " 次/" + gconv.String(config.O1PER) + ",请等待 " + gconv.String(int(wait)) + " 秒后再试.",
})
return
} else {
// 消耗一个令牌
limiter.Allow()
r.Response.Status = 200
return
}
limit, per, limiter, err := GetVisitorWithModel(ctx, token, model)
if err != nil {
g.Log().Error(ctx, "GetVisitorWithModel", err)
r.Response.Status = 500
r.Response.WriteJson(g.Map{
"detail": err.Error(),
})
return
}

// 判断模型是否为plus模型 如果是则使用plus模型的限制
// if gstr.HasPrefix(model, "gpt-4")&&model!="gpt-4o-mini"
if config.PlusModels.Contains(model) {
limiter := GetVisitor(token, config.LIMIT, config.PER)
// 获取剩余次数
remain := limiter.TokensAt(time.Now())
g.Log().Debug(ctx, "remain", remain)
if remain < 1 {
r.Response.Status = 429
// resMsg := gjson.New(MsgPlus429)
// 根据remain计算需要等待的时间
// 生产间隔
creatInterval := config.PER / time.Duration(config.LIMIT)
// 转换为秒
creatIntervalSec := float64(creatInterval.Seconds())
// 等待时间
wait := (1 - remain) * creatIntervalSec
g.Log().Debug(ctx, "wait", wait, "creatIntervalSec", creatIntervalSec)
// resMsg.Set("detail.clears_in", int(wait))
// r.Response.WriteJson(resMsg)
r.Response.WriteJson(g.Map{
// "detail:":"您已经触发使用频率限制,当前限制为 "+ gconv.String(config.LIMIT) + " 次/"+ gconv.String(config.PER) + ",请等待 " + gconv.String(int(wait)) + " 秒后再试.",
"detail": "You have triggered the usage frequency limit, the current limit is " + gconv.String(config.LIMIT) + " times/" + gconv.String(config.PER) + ", please wait " + gconv.String(int(wait)) + " seconds before trying again.\n" + "您已经触发使用频率限制,当前限制为 " + gconv.String(config.LIMIT) + " 次/" + gconv.String(config.PER) + ",请等待 " + gconv.String(int(wait)) + " 秒后再试.",
})
return
} else {
// 消耗一个令牌
limiter.Allow()
r.Response.Status = 200
return
}

// 获取剩余次数
remain := limiter.TokensAt(time.Now())
g.Log().Debug(ctx, token, model, "remain", remain, "limit", limit, "per", per)
if remain < 1 {
r.Response.Status = 429
delayFrom := limiter.Reserve().DelayFrom(time.Now())
g.Log().Debug(ctx, "delayFrom", delayFrom)
r.Response.WriteJson(g.Map{
"detail": "You have triggered the usage frequency limit of " + model + ", the current limit is " + gconv.String(limit) + " times/" + gconv.String(per) + ", please wait " + gconv.String(int(delayFrom.Seconds())) + " seconds before trying again.\n" + "您已经触发 " + model + " 使用频率限制,当前限制为 " + gconv.String(limit) + " 次/" + gconv.String(per) + ",请等待 " + gconv.String(int(delayFrom.Seconds())) + " 秒后再试.",
})
return
}
// 消耗一个令牌
limiter.Allow()

r.Response.Status = 200

Expand Down
36 changes: 29 additions & 7 deletions api/limit.go
Original file line number Diff line number Diff line change
@@ -1,52 +1,74 @@
package api

import (
"auditlimit/config"
"strings"
"sync"
"time"

"github.com/gogf/gf/v2/frame/g"
"github.com/gogf/gf/v2/text/gstr"
"github.com/gogf/gf/v2/util/gconv"
"golang.org/x/time/rate"
)

type visitor struct {
limiter *rate.Limiter
lastSeen time.Time
Per time.Duration
}

var visitors = make(map[string]*visitor)
var mtx sync.Mutex

func GetVisitor(token string, limit int, per time.Duration) *rate.Limiter {
func GetVisitor(key string, limit int, per time.Duration) *rate.Limiter {
mtx.Lock()
defer mtx.Unlock()

v, exists := visitors[token]
v, exists := visitors[key]
if !exists {
limiter := rate.NewLimiter(rate.Every(per/time.Duration(limit)), limit)
visitors[token] = &visitor{limiter, time.Now()}
visitors[key] = &visitor{limiter, time.Now(), per}
return limiter
}

v.lastSeen = time.Now()
return v.limiter
}

func GetVisitorWithModel(ctx g.Ctx, token, model string) (limit int, per time.Duration, limiter *rate.Limiter, err error) {
model = gstr.ToUpper(model)
modelrate := g.Cfg().MustGetWithEnv(ctx, model, "40/3h").String()
modelratearr := strings.Split(modelrate, "/")
// g.Dump(modelratearr)
if len(modelratearr) != 2 {
modelratearr = []string{"40", "3h"}
}
limit = gconv.Int(modelratearr[0])
// per = gconv.Duration(modelratearr[1])
per, err = time.ParseDuration(modelratearr[1])
if err != nil {
return 0, 0, nil, err
}
return limit, per, GetVisitor(token+"|"+model, limit, per), nil

}

func CleanupVisitors() {
mtx.Lock()
defer mtx.Unlock()

for token, v := range visitors {
if time.Since(v.lastSeen) > config.PER {
if time.Since(v.lastSeen) > v.Per {
delete(visitors, token)
}
}
}

func init() {
// 每小时清理一次
// 每星期清理一次
go func() {
for {
time.Sleep(1 * time.Hour)
time.Sleep(time.Hour * 24 * 7)
CleanupVisitors()
}
}()
Expand Down
21 changes: 21 additions & 0 deletions api/limit_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package api_test

import (
"auditlimit/api"
"testing"

"github.com/gogf/gf/v2/frame/g"
"github.com/gogf/gf/v2/os/gctx"
)

func TestGetVisitorWithModel(t *testing.T) {
ctx := gctx.New()
limit, per, limiter, err := api.GetVisitorWithModel(ctx, "token", "text-davinci-002-render-sha")
if err != nil {
g.Log().Error(ctx, "GetVisitorWithModel", err)
return
}
g.Dump(limiter)
g.Log().Info(ctx, "limit:", limit, "per:", per, "limiter:", limiter)

}
63 changes: 30 additions & 33 deletions config/config.go
Original file line number Diff line number Diff line change
@@ -1,26 +1,23 @@
package config

import (
"time"

"github.com/gogf/gf/v2/container/garray"
"github.com/gogf/gf/v2/frame/g"
"github.com/gogf/gf/v2/os/gctx"
)

var (
PORT = 8080
PlusModels = garray.NewStrArrayFrom([]string{"gpt-4", "gpt-4o", "gpt-4-browsing", "gpt-4-plugins", "gpt-4-mobile", "gpt-4-code-interpreter", "gpt-4-dalle", "gpt-4-gizmo", "gpt-4-magic-create", "gpt-4o-canmore"})
O1Models = garray.NewStrArrayFrom([]string{"o1-preview", "o1-mini"})
ForbiddenWords = []string{} // 禁止词
LIMIT = 40 // 限制次数
PER = time.Hour * 3 // 限制时间
O1LIMIT = 5 // 限制次数
O1PER = time.Hour * 24 * 7 // 限制时间
OAIKEY = "" // OAIKEY
OAIKEYLOG = "" // OAIKEYLOG 隐藏
PORT = 8080
// PlusModels = garray.NewStrArrayFrom([]string{"gpt-4", "gpt-4o", "gpt-4-browsing", "gpt-4-plugins", "gpt-4-mobile", "gpt-4-code-interpreter", "gpt-4-dalle", "gpt-4-gizmo", "gpt-4-magic-create", "gpt-4o-canmore"})
// O1Models = garray.NewStrArrayFrom([]string{"o1-preview", "o1-mini"})
ForbiddenWords = []string{} // 禁止词
// LIMIT = 40 // 限制次数
// PER = time.Hour * 3 // 限制时间
// O1LIMIT = 5 // 限制次数
// O1PER = time.Hour * 24 * 7 // 限制时间
OAIKEY = "" // OAIKEY
OAIKEYLOG = "" // OAIKEYLOG 隐藏
// MODERATION = "https://api.openai.com/v1/moderations" // OPENAI Moderation 检测
MODERATION = "https://gateway.ai.cloudflare.com/v1/a8cace244ffbc233655fefeaca37d515/xyhelper/openai/moderations"
MODERATION = "https://gateway.ai.cloudflare.com/v1/040ac2002b4dd67637e97c628feb3484/xyhelper/openai/moderations"
)

func init() {
Expand All @@ -30,25 +27,25 @@ func init() {
PORT = port
}
g.Log().Info(ctx, "PORT:", PORT)
limit := g.Cfg().MustGetWithEnv(ctx, "LIMIT").Int()
if limit > 0 {
LIMIT = limit
}
g.Log().Info(ctx, "LIMIT:", LIMIT)
per := g.Cfg().MustGetWithEnv(ctx, "PER").Duration()
if per > 0 {
PER = per
}
g.Log().Info(ctx, "PER:", PER)
o1limit := g.Cfg().MustGetWithEnv(ctx, "O1LIMIT").Int()
if o1limit > 0 {
O1LIMIT = o1limit
}
g.Log().Info(ctx, "O1LIMIT:", O1LIMIT)
o1per := g.Cfg().MustGetWithEnv(ctx, "O1PER").Duration()
if o1per > 0 {
O1PER = o1per
}
// limit := g.Cfg().MustGetWithEnv(ctx, "LIMIT").Int()
// if limit > 0 {
// LIMIT = limit
// }
// g.Log().Info(ctx, "LIMIT:", LIMIT)
// per := g.Cfg().MustGetWithEnv(ctx, "PER").Duration()
// if per > 0 {
// PER = per
// }
// g.Log().Info(ctx, "PER:", PER)
// o1limit := g.Cfg().MustGetWithEnv(ctx, "O1LIMIT").Int()
// if o1limit > 0 {
// O1LIMIT = o1limit
// }
// g.Log().Info(ctx, "O1LIMIT:", O1LIMIT)
// o1per := g.Cfg().MustGetWithEnv(ctx, "O1PER").Duration()
// if o1per > 0 {
// O1PER = o1per
// }
oaikey := g.Cfg().MustGetWithEnv(ctx, "OAIKEY").String()
// oaikey 不为空
if oaikey != "" {
Expand Down
14 changes: 10 additions & 4 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
PORT: 9612
LIMIT: 2
PER: "1h"
OAIKEY: ""
MODERATION: "https://api.openai.com/v1/moderations"
OAIKEY: "" # OpenAI API key 用于内容审核
MODERATION: "https://api.openai.com/v1/moderations"
AUTO: "200/3h"
TEXT-DAVINCI-002-RENDER-SHA: "200/3h"
GPT-4O-MINI: "200/3h"
GPT-4O: "60/3h"
GPT-4: "20/3h"
GPT-4O-CANMORE: "30/3h"
O1-PREVIEW: "7/24h"
O1-MINI: "50/24h"
13 changes: 10 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,14 @@ services:
volumes:
- ./data:/app/data
environment:
LIMIT: 40 # 限制每个userToken允许的次数
PER: "3h" # 限制周期 1s, 1m, 1h, 1d, 1w, 1y

PORT: 9611
OAIKEY: "" # OpenAI API key 用于内容审核
AUTO: "200/3h"
TEXT-DAVINCI-002-RENDER-SHA: "200/3h"
GPT-4O-MINI: "200/3h"
GPT-4O: "60/3h"
GPT-4: "20/3h"
GPT-4O-CANMORE: "30/3h"
O1-PREVIEW: "7/24h"
O1-MINI: "50/24h"

0 comments on commit 1f64980

Please sign in to comment.