From 3eda7def89371c6caebfdf060da76bf65064415f Mon Sep 17 00:00:00 2001 From: rinfx Date: Wed, 26 Feb 2025 18:42:22 +0800 Subject: [PATCH] ai-search support quark (#1811) --- .../wasm-go/extensions/ai-search/README.md | 25 ++- .../wasm-go/extensions/ai-search/README_EN.md | 23 ++- .../extensions/ai-search/engine/bing/bing.go | 2 +- .../ai-search/engine/google/google.go | 2 +- .../ai-search/engine/quark/quark.go | 194 ++++++++++++++++++ plugins/wasm-go/extensions/ai-search/main.go | 11 +- 6 files changed, 250 insertions(+), 7 deletions(-) create mode 100644 plugins/wasm-go/extensions/ai-search/engine/quark/quark.go diff --git a/plugins/wasm-go/extensions/ai-search/README.md b/plugins/wasm-go/extensions/ai-search/README.md index c7472caa12..5ae133148a 100644 --- a/plugins/wasm-go/extensions/ai-search/README.md +++ b/plugins/wasm-go/extensions/ai-search/README.md @@ -48,10 +48,10 @@ description: higress 支持通过集成搜索引擎(Google/Bing/Arxiv/Elastics | 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | |------|----------|----------|--------|------| -| type | string | 必填 | - | 引擎类型(google/bing/arxiv/elasticsearch) | -| apiKey | string | 必填 | - | 搜索引擎API密钥 | +| type | string | 必填 | - | 引擎类型(google/bing/arxiv/elasticsearch/quark) | | serviceName | string | 必填 | - | 后端服务名称 | | servicePort | number | 必填 | - | 后端服务端口 | +| apiKey | string | 必填 | - | 搜索引擎API密钥/Aliyun AccessKey | | count | number | 选填 | 10 | 单次搜索返回结果数量 | | start | number | 选填 | 0 | 搜索结果偏移量(从第start+1条结果开始返回) | | timeoutMillisecond | number | 选填 | 5000 | API调用超时时间(毫秒) | @@ -78,6 +78,12 @@ description: higress 支持通过集成搜索引擎(Google/Bing/Arxiv/Elastics | linkField | string | 必填 | - | 结果链接字段名称 | | titleField | string | 必填 | - | 结果标题字段名称 | +## Quark 特定配置 + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------|----------|----------|--------|------| +| secretKey | string | 必填 | - | Aliyun SecretKey | +| endpoint | string | 选填 | iqs.cn-zhangjiakou.aliyuncs.com | 请求搜索引擎服务时的接入点 | ## 配置示例 @@ -94,6 +100,7 @@ searchFrom: count: 5 optionArgs: fileType: "pdf" +``` ### Arxiv搜索配置 @@ -106,6 +113,20 @@ searchFrom: count: 10 ``` + +### 夸克搜索配置 + +```yaml +searchFrom: +- type: quark + serviceName: "quark-svc.dns" + servicePort: 443 + apiKey: "aliyun accessKey" + count: 10 # 搜索网页数,最多10条 + secretKey: "aliyun secretKey" + endpoint: "iqs.cn-zhangjiakou.aliyuncs.com" +``` + ### 多搜索引擎配置 ```yaml diff --git a/plugins/wasm-go/extensions/ai-search/README_EN.md b/plugins/wasm-go/extensions/ai-search/README_EN.md index 8579537386..1afd955bd9 100644 --- a/plugins/wasm-go/extensions/ai-search/README_EN.md +++ b/plugins/wasm-go/extensions/ai-search/README_EN.md @@ -48,8 +48,8 @@ It is strongly recommended to enable this feature when using Arxiv or Elasticsea | Name | Data Type | Requirement | Default Value | Description | |------|-----------|-------------|---------------|-------------| -| type | string | Required | - | Engine type (google/bing/arxiv/elasticsearch) | -| apiKey | string | Required | - | Search engine API key | +| type | string | Required | - | Engine type (google/bing/arxiv/elasticsearch/quark) | +| apiKey | string | Required | - | Search engine API key/Aliyun AccessKey | | serviceName | string | Required | - | Backend service name | | servicePort | number | Required | - | Backend service port | | count | number | Optional | 10 | Number of results returned per search | @@ -78,6 +78,12 @@ It is strongly recommended to enable this feature when using Arxiv or Elasticsea | linkField | string | Required | - | Result link field name | | titleField | string | Required | - | Result title field name | +## Quark Specific Configuration + +| Name | Data Type | Requirement | Default Value | Description | +|------|----------|----------|--------|------| +| secretKey | string | Required | - | Aliyun SecretKey | +| endpoint | string | Optional | iqs.cn-zhangjiakou.aliyuncs.com | Endpoint for accessing quark | ## Configuration Examples @@ -107,6 +113,19 @@ searchFrom: count: 10 ``` +### 夸克搜索配置 + +```yaml +searchFrom: +- type: quark + serviceName: "quark-svc.dns" + servicePort: 443 + apiKey: "aliyun accessKey" + count: 10 + secretKey: "aliyun secretKey" + endpoint: "iqs.cn-zhangjiakou.aliyuncs.com" +``` + ### Multiple Search Engines Configuration ```yaml diff --git a/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go b/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go index 71d39883ee..b24fe33464 100644 --- a/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go +++ b/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go @@ -60,7 +60,7 @@ func NewBingSearch(config *gjson.Result) (*BingSearch, error) { } func (b BingSearch) NeedExectue(ctx engine.SearchContext) bool { - return ctx.EngineType == "internet" + return ctx.EngineType == "" || ctx.EngineType == "internet" } func (b BingSearch) Client() wrapper.HttpClient { diff --git a/plugins/wasm-go/extensions/ai-search/engine/google/google.go b/plugins/wasm-go/extensions/ai-search/engine/google/google.go index c13cd0c9d2..e189646b99 100644 --- a/plugins/wasm-go/extensions/ai-search/engine/google/google.go +++ b/plugins/wasm-go/extensions/ai-search/engine/google/google.go @@ -68,7 +68,7 @@ func NewGoogleSearch(config *gjson.Result) (*GoogleSearch, error) { } func (g GoogleSearch) NeedExectue(ctx engine.SearchContext) bool { - return ctx.EngineType == "internet" + return ctx.EngineType == "" || ctx.EngineType == "internet" } func (g GoogleSearch) Client() wrapper.HttpClient { diff --git a/plugins/wasm-go/extensions/ai-search/engine/quark/quark.go b/plugins/wasm-go/extensions/ai-search/engine/quark/quark.go new file mode 100644 index 0000000000..84273bb776 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/engine/quark/quark.go @@ -0,0 +1,194 @@ +package quark + +import ( + "crypto/hmac" + "crypto/rand" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "net/http" + "net/url" + "sort" + "strings" + "time" + + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" + "github.com/tidwall/gjson" + + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine" +) + +type QuarkSearch struct { + apiKey string + secretKey string + timeoutMillisecond uint32 + client wrapper.HttpClient + count uint32 + endpoint string +} + +const ( + Path = "/linked-retrieval/linked-retrieval-entry/v2/linkedRetrieval/commands/genericSearch" + ContentSha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" // for empty body + Action = "GenericSearch" + Version = "2024-11-11" + SignatureAlgorithm = "ACS3-HMAC-SHA256" + SignedHeaders = "host;x-acs-action;x-acs-content-sha256;x-acs-date;x-acs-signature-nonce;x-acs-version" +) + +func urlEncoding(rawStr string) string { + encodedStr := url.PathEscape(rawStr) + encodedStr = strings.ReplaceAll(encodedStr, "+", "%2B") + encodedStr = strings.ReplaceAll(encodedStr, ":", "%3A") + encodedStr = strings.ReplaceAll(encodedStr, "=", "%3D") + encodedStr = strings.ReplaceAll(encodedStr, "&", "%26") + encodedStr = strings.ReplaceAll(encodedStr, "$", "%24") + encodedStr = strings.ReplaceAll(encodedStr, "@", "%40") + // encodedStr := url.QueryEscape(rawStr) + return encodedStr +} + +func getSignature(stringToSign, secret string) string { + h := hmac.New(sha256.New, []byte(secret)) + h.Write([]byte(stringToSign)) + hash := h.Sum(nil) + return hex.EncodeToString(hash) +} + +func getCanonicalHeaders(params map[string]string) string { + paramArray := []string{} + for k, v := range params { + paramArray = append(paramArray, k+":"+v) + } + sort.Slice(paramArray, func(i, j int) bool { + return paramArray[i] <= paramArray[j] + }) + return strings.Join(paramArray, "\n") + "\n" +} + +func getHasedString(input string) string { + hash := sha256.Sum256([]byte(input)) + hashHex := hex.EncodeToString(hash[:]) + return hashHex +} + +func generateHexID(length int) (string, error) { + bytes := make([]byte, length/2) + if _, err := rand.Read(bytes); err != nil { + return "", err + } + return hex.EncodeToString(bytes), nil +} + +func NewQuarkSearch(config *gjson.Result) (*QuarkSearch, error) { + engine := &QuarkSearch{} + engine.apiKey = config.Get("apiKey").String() + if engine.apiKey == "" { + return nil, errors.New("apiKey not found") + } + engine.secretKey = config.Get("secretKey").String() + if engine.secretKey == "" { + return nil, errors.New("secretKey not found") + } + serviceName := config.Get("serviceName").String() + if serviceName == "" { + return nil, errors.New("serviceName not found") + } + servicePort := config.Get("servicePort").Int() + if servicePort == 0 { + return nil, errors.New("servicePort not found") + } + engine.endpoint = config.Get("endpoint").String() + if engine.endpoint == "" { + engine.endpoint = "iqs.cn-zhangjiakou.aliyuncs.com" + } + engine.count = uint32(config.Get("count").Int()) + if engine.count == 0 { + engine.count = 10 + } + engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{ + FQDN: serviceName, + Port: servicePort, + }) + engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint()) + if engine.timeoutMillisecond == 0 { + engine.timeoutMillisecond = 5000 + } + return engine, nil +} + +func (g QuarkSearch) NeedExectue(ctx engine.SearchContext) bool { + return ctx.EngineType == "" || ctx.EngineType == "internet" +} + +func (g QuarkSearch) Client() wrapper.HttpClient { + return g.client +} + +func (g QuarkSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs { + query := strings.Join(ctx.Querys, " ") + canonicalURI := Path + queryParams := map[string]string{ + "query": query, + "timeRange": "NoLimit", + } + queryParamsStr := []string{} + for k, v := range queryParams { + queryParamsStr = append(queryParamsStr, k+"="+urlEncoding(v)) + } + canonicalQueryString := strings.Join(queryParamsStr, "&") + timeStamp := time.Now().UTC().Format("2006-01-02T15:04:05Z") + randomID, _ := generateHexID(32) + params := map[string]string{ + "host": g.endpoint, + "x-acs-action": Action, + "x-acs-content-sha256": ContentSha256, + "x-acs-date": timeStamp, + "x-acs-signature-nonce": randomID, + "x-acs-version": Version, + } + canonicalHeaders := getCanonicalHeaders(params) + canonicalRequest := http.MethodGet + "\n" + canonicalURI + "\n" + canonicalQueryString + "\n" + canonicalHeaders + "\n" + SignedHeaders + "\n" + ContentSha256 + stringToSign := SignatureAlgorithm + "\n" + getHasedString(canonicalRequest) + + authHeaderFmt := "%s Credential=%s,SignedHeaders=%s,Signature=%s" + authHeader := fmt.Sprintf(authHeaderFmt, SignatureAlgorithm, g.apiKey, SignedHeaders, getSignature(stringToSign, g.secretKey)) + + reqParams := url.Values{} + for k, v := range queryParams { + reqParams.Add(k, v) + } + requestURL := fmt.Sprintf("https://%s%s?%s", g.endpoint, Path, reqParams.Encode()) + + return engine.CallArgs{ + Method: http.MethodGet, + Url: requestURL, + Headers: [][2]string{ + {"x-acs-date", timeStamp}, + {"x-acs-signature-nonce", randomID}, + {"x-acs-content-sha256", ContentSha256}, + {"x-acs-version", Version}, + {"x-acs-action", Action}, + {"Authorization", authHeader}, + }, + Body: nil, + TimeoutMillisecond: g.timeoutMillisecond, + } +} + +func (g QuarkSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult { + jsonObj := gjson.ParseBytes(response) + var results []engine.SearchResult + for index, item := range jsonObj.Get("pageItems").Array() { + result := engine.SearchResult{ + Title: item.Get("title").String(), + Link: item.Get("link").String(), + Content: item.Get("mainText").String(), + } + if result.Valid() && index < int(g.count) { + results = append(results, result) + } + } + return results +} diff --git a/plugins/wasm-go/extensions/ai-search/main.go b/plugins/wasm-go/extensions/ai-search/main.go index 842094d8dc..720e688ccc 100644 --- a/plugins/wasm-go/extensions/ai-search/main.go +++ b/plugins/wasm-go/extensions/ai-search/main.go @@ -34,6 +34,7 @@ import ( "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/bing" "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/elasticsearch" "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/google" + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/quark" ) type SearchRewrite struct { @@ -166,6 +167,13 @@ func parseConfig(json gjson.Result, config *Config, log wrapper.Log) error { } config.engine = append(config.engine, searchEngine) privateExists = true + case "quark": + searchEngine, err := quark.NewQuarkSearch(&e) + if err != nil { + return fmt.Errorf("elasticsearch search engine init failed:%s", err) + } + config.engine = append(config.engine, searchEngine) + internetExists = true default: return fmt.Errorf("unkown search engine:%s", e.Get("type").String()) } @@ -541,7 +549,8 @@ func setReferencesToFirstMessage(ctx wrapper.HttpContext, chunk []byte, referenc if len(messages) > 1 { firstMessage := messages[0] log.Debugf("first message: %s", firstMessage) - firstMessage = strings.TrimPrefix(firstMessage, "data: ") + firstMessage = strings.TrimPrefix(firstMessage, "data:") + firstMessage = strings.TrimPrefix(firstMessage, " ") firstMessage = strings.TrimSuffix(firstMessage, "\n") deltaContent := gjson.Get(firstMessage, "choices.0.delta.content") modifiedMessage, err := sjson.Set(firstMessage, "choices.0.delta.content", fmt.Sprintf("%s\n\n%s", references, deltaContent))