调整支持按模型分别定制限速

cockroachai · Oct 11, 2024 · 1f64980 · 1f64980
1 parent 5a8d618
commit 1f64980
Show file tree

Hide file tree

Showing 7 changed files with 131 additions and 117 deletions.
diff --git a/README.md b/README.md
@@ -17,12 +17,15 @@ services:
     ports:
       - 9611:8080
     environment:
-      LIMIT: 40  # 限制每个userToken允许的次数
-      PER: "3h" # 限制周期 1s, 1m, 1h, 1d, 1w, 1y
-      O1LIMIT: 10  # 限制每个userToken允许的O1模型次数
-      O1PER: "1w" # 限制周期 1s, 1m, 1h, 1d, 1w, 1y
-
-
+      OAIKEY: "" # OpenAI API key 用于内容审核
+      AUTO: "200/3h"
+      TEXT-DAVINCI-002-RENDER-SHA: "200/3h"
+      GPT-4O-MINI: "200/3h"
+      GPT-4O: "60/3h"
+      GPT-4: "20/3h"
+      GPT-4O-CANMORE: "30/3h"
+      O1-PREVIEW: "7/24h"
+      O1-MINI: "50/24h" # 模型名称: "次数/时间" 时间单位: h(小时) m(分钟) s(秒)  模型名称要改成大写
 
 ```
 

diff --git a/api/audit_limit.go b/api/audit_limit.go
@@ -53,7 +53,7 @@ func AuditLimit(r *ghttp.Request) {
 	}
 
 	// OPENAI Moderation 检测
-	if config.OAIKEY != "" {
+	if config.OAIKEY != "" && prompt != "" {
 		// 检测是否包含违规内容
 		respVar := g.Client().SetHeaderMap(g.MapStrStr{
 			"Authorization": "Bearer " + config.OAIKEY,
@@ -75,71 +75,29 @@ func AuditLimit(r *ghttp.Request) {
 			return
 		}
 	}
-	// 先判断是否为o1模型
-	if config.O1Models.Contains(model) {
-		limiter := GetVisitor(token+"|o1model", config.O1LIMIT, config.O1PER)
-		// 获取剩余次数
-		remain := limiter.TokensAt(time.Now())
-		g.Log().Debug(ctx, "remain", remain)
-		if remain < 1 {
-			r.Response.Status = 429
-			// resMsg := gjson.New(MsgO1429)
-			// 根据remain计算需要等待的时间
-			// 生产间隔
-			creatInterval := config.O1PER / time.Duration(config.O1LIMIT)
-			// 转换为秒
-			creatIntervalSec := float64(creatInterval.Seconds())
-			// 等待时间
-			wait := (1 - remain) * creatIntervalSec
-			g.Log().Debug(ctx, "wait", wait, "creatIntervalSec", creatIntervalSec)
-			// resMsg.Set("detail.clears_in", int(wait))
-			// r.Response.WriteJson(resMsg)
-			r.Response.WriteJson(g.Map{
-				// "detail:":"您已经触发使用频率限制,当前限制为 "+ gconv.String(config.O1LIMIT) + " 次/"+ gconv.String(config.O1PER) + ",请等待 " + gconv.String(int(wait)) + " 秒后再试.",
-				"detail": "You have triggered the usage frequency limit, the current limit is " + gconv.String(config.O1LIMIT) + " times/" + gconv.String(config.O1PER) + ", please wait " + gconv.String(int(wait)) + " seconds before trying again.\n" + "您已经触发使用频率限制,当前限制为 " + gconv.String(config.O1LIMIT) + " 次/" + gconv.String(config.O1PER) + ",请等待 " + gconv.String(int(wait)) + " 秒后再试.",
-			})
-			return
-		} else {
-			// 消耗一个令牌
-			limiter.Allow()
-			r.Response.Status = 200
-			return
-		}
+	limit, per, limiter, err := GetVisitorWithModel(ctx, token, model)
+	if err != nil {
+		g.Log().Error(ctx, "GetVisitorWithModel", err)
+		r.Response.Status = 500
+		r.Response.WriteJson(g.Map{
+			"detail": err.Error(),
+		})
+		return
 	}
-
-	// 判断模型是否为plus模型 如果是则使用plus模型的限制
-	// if gstr.HasPrefix(model, "gpt-4")&&model!="gpt-4o-mini"
-	if config.PlusModels.Contains(model) {
-		limiter := GetVisitor(token, config.LIMIT, config.PER)
-		// 获取剩余次数
-		remain := limiter.TokensAt(time.Now())
-		g.Log().Debug(ctx, "remain", remain)
-		if remain < 1 {
-			r.Response.Status = 429
-			// resMsg := gjson.New(MsgPlus429)
-			// 根据remain计算需要等待的时间
-			// 生产间隔
-			creatInterval := config.PER / time.Duration(config.LIMIT)
-			// 转换为秒
-			creatIntervalSec := float64(creatInterval.Seconds())
-			// 等待时间
-			wait := (1 - remain) * creatIntervalSec
-			g.Log().Debug(ctx, "wait", wait, "creatIntervalSec", creatIntervalSec)
-			// resMsg.Set("detail.clears_in", int(wait))
-			// r.Response.WriteJson(resMsg)
-			r.Response.WriteJson(g.Map{
-				// "detail:":"您已经触发使用频率限制,当前限制为 "+ gconv.String(config.LIMIT) + " 次/"+ gconv.String(config.PER) + ",请等待 " + gconv.String(int(wait)) + " 秒后再试.",
-				"detail": "You have triggered the usage frequency limit, the current limit is " + gconv.String(config.LIMIT) + " times/" + gconv.String(config.PER) + ", please wait " + gconv.String(int(wait)) + " seconds before trying again.\n" + "您已经触发使用频率限制,当前限制为 " + gconv.String(config.LIMIT) + " 次/" + gconv.String(config.PER) + ",请等待 " + gconv.String(int(wait)) + " 秒后再试.",
-			})
-			return
-		} else {
-			// 消耗一个令牌
-			limiter.Allow()
-			r.Response.Status = 200
-			return
-		}
-
+	// 获取剩余次数
+	remain := limiter.TokensAt(time.Now())
+	g.Log().Debug(ctx, token, model, "remain", remain, "limit", limit, "per", per)
+	if remain < 1 {
+		r.Response.Status = 429
+		delayFrom := limiter.Reserve().DelayFrom(time.Now())
+		g.Log().Debug(ctx, "delayFrom", delayFrom)
+		r.Response.WriteJson(g.Map{
+			"detail": "You have triggered the usage frequency limit of " + model + ", the current limit is " + gconv.String(limit) + " times/" + gconv.String(per) + ", please wait " + gconv.String(int(delayFrom.Seconds())) + " seconds before trying again.\n" + "您已经触发 " + model + " 使用频率限制,当前限制为 " + gconv.String(limit) + " 次/" + gconv.String(per) + ",请等待 " + gconv.String(int(delayFrom.Seconds())) + " 秒后再试.",
+		})
+		return
 	}
+	// 消耗一个令牌
+	limiter.Allow()
 
 	r.Response.Status = 200
 

diff --git a/api/limit.go b/api/limit.go
@@ -1,52 +1,74 @@
 package api
 
 import (
-	"auditlimit/config"
+	"strings"
 	"sync"
 	"time"
 
+	"github.com/gogf/gf/v2/frame/g"
+	"github.com/gogf/gf/v2/text/gstr"
+	"github.com/gogf/gf/v2/util/gconv"
 	"golang.org/x/time/rate"
 )
 
 type visitor struct {
 	limiter  *rate.Limiter
 	lastSeen time.Time
+	Per      time.Duration
 }
 
 var visitors = make(map[string]*visitor)
 var mtx sync.Mutex
 
-func GetVisitor(token string, limit int, per time.Duration) *rate.Limiter {
+func GetVisitor(key string, limit int, per time.Duration) *rate.Limiter {
 	mtx.Lock()
 	defer mtx.Unlock()
 
-	v, exists := visitors[token]
+	v, exists := visitors[key]
 	if !exists {
 		limiter := rate.NewLimiter(rate.Every(per/time.Duration(limit)), limit)
-		visitors[token] = &visitor{limiter, time.Now()}
+		visitors[key] = &visitor{limiter, time.Now(), per}
 		return limiter
 	}
 
 	v.lastSeen = time.Now()
 	return v.limiter
 }
 
+func GetVisitorWithModel(ctx g.Ctx, token, model string) (limit int, per time.Duration, limiter *rate.Limiter, err error) {
+	model = gstr.ToUpper(model)
+	modelrate := g.Cfg().MustGetWithEnv(ctx, model, "40/3h").String()
+	modelratearr := strings.Split(modelrate, "/")
+	// g.Dump(modelratearr)
+	if len(modelratearr) != 2 {
+		modelratearr = []string{"40", "3h"}
+	}
+	limit = gconv.Int(modelratearr[0])
+	// per = gconv.Duration(modelratearr[1])
+	per, err = time.ParseDuration(modelratearr[1])
+	if err != nil {
+		return 0, 0, nil, err
+	}
+	return limit, per, GetVisitor(token+"|"+model, limit, per), nil
+
+}
+
 func CleanupVisitors() {
 	mtx.Lock()
 	defer mtx.Unlock()
 
 	for token, v := range visitors {
-		if time.Since(v.lastSeen) > config.PER {
+		if time.Since(v.lastSeen) > v.Per {
 			delete(visitors, token)
 		}
 	}
 }
 
 func init() {
-	// 每小时清理一次
+	// 每星期清理一次
 	go func() {
 		for {
-			time.Sleep(1 * time.Hour)
+			time.Sleep(time.Hour * 24 * 7)
 			CleanupVisitors()
 		}
 	}()

diff --git a/api/limit_test.go b/api/limit_test.go
@@ -0,0 +1,21 @@
+package api_test
+
+import (
+	"auditlimit/api"
+	"testing"
+
+	"github.com/gogf/gf/v2/frame/g"
+	"github.com/gogf/gf/v2/os/gctx"
+)
+
+func TestGetVisitorWithModel(t *testing.T) {
+	ctx := gctx.New()
+	limit, per, limiter, err := api.GetVisitorWithModel(ctx, "token", "text-davinci-002-render-sha")
+	if err != nil {
+		g.Log().Error(ctx, "GetVisitorWithModel", err)
+		return
+	}
+	g.Dump(limiter)
+	g.Log().Info(ctx, "limit:", limit, "per:", per, "limiter:", limiter)
+
+}
diff --git a/config/config.go b/config/config.go
@@ -1,26 +1,23 @@
 package config
 
 import (
-	"time"
-
-	"github.com/gogf/gf/v2/container/garray"
 	"github.com/gogf/gf/v2/frame/g"
 	"github.com/gogf/gf/v2/os/gctx"
 )
 
 var (
-	PORT           = 8080
-	PlusModels     = garray.NewStrArrayFrom([]string{"gpt-4", "gpt-4o", "gpt-4-browsing", "gpt-4-plugins", "gpt-4-mobile", "gpt-4-code-interpreter", "gpt-4-dalle", "gpt-4-gizmo", "gpt-4-magic-create", "gpt-4o-canmore"})
-	O1Models       = garray.NewStrArrayFrom([]string{"o1-preview", "o1-mini"})
-	ForbiddenWords = []string{}         // 禁止词
-	LIMIT          = 40                 // 限制次数
-	PER            = time.Hour * 3      // 限制时间
-	O1LIMIT        = 5                  // 限制次数
-	O1PER          = time.Hour * 24 * 7 // 限制时间
-	OAIKEY         = ""                 // OAIKEY
-	OAIKEYLOG      = ""                 // OAIKEYLOG 隐藏
+	PORT = 8080
+	// PlusModels     = garray.NewStrArrayFrom([]string{"gpt-4", "gpt-4o", "gpt-4-browsing", "gpt-4-plugins", "gpt-4-mobile", "gpt-4-code-interpreter", "gpt-4-dalle", "gpt-4-gizmo", "gpt-4-magic-create", "gpt-4o-canmore"})
+	// O1Models       = garray.NewStrArrayFrom([]string{"o1-preview", "o1-mini"})
+	ForbiddenWords = []string{} // 禁止词
+	// LIMIT          = 40                 // 限制次数
+	// PER            = time.Hour * 3      // 限制时间
+	// O1LIMIT        = 5                  // 限制次数
+	// O1PER          = time.Hour * 24 * 7 // 限制时间
+	OAIKEY    = "" // OAIKEY
+	OAIKEYLOG = "" // OAIKEYLOG 隐藏
 	// MODERATION     = "https://api.openai.com/v1/moderations" // OPENAI Moderation 检测
-	MODERATION = "https://gateway.ai.cloudflare.com/v1/a8cace244ffbc233655fefeaca37d515/xyhelper/openai/moderations"
+	MODERATION = "https://gateway.ai.cloudflare.com/v1/040ac2002b4dd67637e97c628feb3484/xyhelper/openai/moderations"
 )
 
 func init() {
@@ -30,25 +27,25 @@ func init() {
 		PORT = port
 	}
 	g.Log().Info(ctx, "PORT:", PORT)
-	limit := g.Cfg().MustGetWithEnv(ctx, "LIMIT").Int()
-	if limit > 0 {
-		LIMIT = limit
-	}
-	g.Log().Info(ctx, "LIMIT:", LIMIT)
-	per := g.Cfg().MustGetWithEnv(ctx, "PER").Duration()
-	if per > 0 {
-		PER = per
-	}
-	g.Log().Info(ctx, "PER:", PER)
-	o1limit := g.Cfg().MustGetWithEnv(ctx, "O1LIMIT").Int()
-	if o1limit > 0 {
-		O1LIMIT = o1limit
-	}
-	g.Log().Info(ctx, "O1LIMIT:", O1LIMIT)
-	o1per := g.Cfg().MustGetWithEnv(ctx, "O1PER").Duration()
-	if o1per > 0 {
-		O1PER = o1per
-	}
+	// limit := g.Cfg().MustGetWithEnv(ctx, "LIMIT").Int()
+	// if limit > 0 {
+	// 	LIMIT = limit
+	// }
+	// g.Log().Info(ctx, "LIMIT:", LIMIT)
+	// per := g.Cfg().MustGetWithEnv(ctx, "PER").Duration()
+	// if per > 0 {
+	// 	PER = per
+	// }
+	// g.Log().Info(ctx, "PER:", PER)
+	// o1limit := g.Cfg().MustGetWithEnv(ctx, "O1LIMIT").Int()
+	// if o1limit > 0 {
+	// 	O1LIMIT = o1limit
+	// }
+	// g.Log().Info(ctx, "O1LIMIT:", O1LIMIT)
+	// o1per := g.Cfg().MustGetWithEnv(ctx, "O1PER").Duration()
+	// if o1per > 0 {
+	// 	O1PER = o1per
+	// }
 	oaikey := g.Cfg().MustGetWithEnv(ctx, "OAIKEY").String()
 	// oaikey 不为空
 	if oaikey != "" {

diff --git a/config/config.yaml b/config/config.yaml
@@ -1,5 +1,11 @@
 PORT: 9612 
-LIMIT: 2
-PER: "1h"
-OAIKEY: ""
-MODERATION: "https://api.openai.com/v1/moderations"
+OAIKEY: "" # OpenAI API key 用于内容审核
+MODERATION: "https://api.openai.com/v1/moderations"
+AUTO: "200/3h"
+TEXT-DAVINCI-002-RENDER-SHA: "200/3h"
+GPT-4O-MINI: "200/3h"
+GPT-4O: "60/3h"
+GPT-4: "20/3h"
+GPT-4O-CANMORE: "30/3h"
+O1-PREVIEW: "7/24h"
+O1-MINI: "50/24h"
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -8,7 +8,14 @@ services:
     volumes:
       - ./data:/app/data
     environment:
-      LIMIT: 40  # 限制每个userToken允许的次数
-      PER: "3h" # 限制周期 1s, 1m, 1h, 1d, 1w, 1y
-
+      PORT: 9611
+      OAIKEY: "" # OpenAI API key 用于内容审核
+      AUTO: "200/3h"
+      TEXT-DAVINCI-002-RENDER-SHA: "200/3h"
+      GPT-4O-MINI: "200/3h"
+      GPT-4O: "60/3h"
+      GPT-4: "20/3h"
+      GPT-4O-CANMORE: "30/3h"
+      O1-PREVIEW: "7/24h"
+      O1-MINI: "50/24h"