Skip to content

Commit

Permalink
ai-search support quark (#1811)
Browse files Browse the repository at this point in the history
  • Loading branch information
rinfx authored Feb 26, 2025
1 parent 1787553 commit 3eda7de
Show file tree
Hide file tree
Showing 6 changed files with 250 additions and 7 deletions.
25 changes: 23 additions & 2 deletions plugins/wasm-go/extensions/ai-search/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ description: higress 支持通过集成搜索引擎(Google/Bing/Arxiv/Elastics

| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|------|----------|----------|--------|------|
| type | string | 必填 | - | 引擎类型(google/bing/arxiv/elasticsearch) |
| apiKey | string | 必填 | - | 搜索引擎API密钥 |
| type | string | 必填 | - | 引擎类型(google/bing/arxiv/elasticsearch/quark) |
| serviceName | string | 必填 | - | 后端服务名称 |
| servicePort | number | 必填 | - | 后端服务端口 |
| apiKey | string | 必填 | - | 搜索引擎API密钥/Aliyun AccessKey |
| count | number | 选填 | 10 | 单次搜索返回结果数量 |
| start | number | 选填 | 0 | 搜索结果偏移量(从第start+1条结果开始返回) |
| timeoutMillisecond | number | 选填 | 5000 | API调用超时时间(毫秒) |
Expand All @@ -78,6 +78,12 @@ description: higress 支持通过集成搜索引擎(Google/Bing/Arxiv/Elastics
| linkField | string | 必填 | - | 结果链接字段名称 |
| titleField | string | 必填 | - | 结果标题字段名称 |

## Quark 特定配置

| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|------|----------|----------|--------|------|
| secretKey | string | 必填 | - | Aliyun SecretKey |
| endpoint | string | 选填 | iqs.cn-zhangjiakou.aliyuncs.com | 请求搜索引擎服务时的接入点 |

## 配置示例

Expand All @@ -94,6 +100,7 @@ searchFrom:
count: 5
optionArgs:
fileType: "pdf"
```
### Arxiv搜索配置
Expand All @@ -106,6 +113,20 @@ searchFrom:
count: 10
```
### 夸克搜索配置
```yaml
searchFrom:
- type: quark
serviceName: "quark-svc.dns"
servicePort: 443
apiKey: "aliyun accessKey"
count: 10 # 搜索网页数,最多10条
secretKey: "aliyun secretKey"
endpoint: "iqs.cn-zhangjiakou.aliyuncs.com"
```
### 多搜索引擎配置
```yaml
Expand Down
23 changes: 21 additions & 2 deletions plugins/wasm-go/extensions/ai-search/README_EN.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ It is strongly recommended to enable this feature when using Arxiv or Elasticsea

| Name | Data Type | Requirement | Default Value | Description |
|------|-----------|-------------|---------------|-------------|
| type | string | Required | - | Engine type (google/bing/arxiv/elasticsearch) |
| apiKey | string | Required | - | Search engine API key |
| type | string | Required | - | Engine type (google/bing/arxiv/elasticsearch/quark) |
| apiKey | string | Required | - | Search engine API key/Aliyun AccessKey |
| serviceName | string | Required | - | Backend service name |
| servicePort | number | Required | - | Backend service port |
| count | number | Optional | 10 | Number of results returned per search |
Expand Down Expand Up @@ -78,6 +78,12 @@ It is strongly recommended to enable this feature when using Arxiv or Elasticsea
| linkField | string | Required | - | Result link field name |
| titleField | string | Required | - | Result title field name |

## Quark Specific Configuration

| Name | Data Type | Requirement | Default Value | Description |
|------|----------|----------|--------|------|
| secretKey | string | Required | - | Aliyun SecretKey |
| endpoint | string | Optional | iqs.cn-zhangjiakou.aliyuncs.com | Endpoint for accessing quark |

## Configuration Examples

Expand Down Expand Up @@ -107,6 +113,19 @@ searchFrom:
count: 10
```
### 夸克搜索配置
```yaml
searchFrom:
- type: quark
serviceName: "quark-svc.dns"
servicePort: 443
apiKey: "aliyun accessKey"
count: 10
secretKey: "aliyun secretKey"
endpoint: "iqs.cn-zhangjiakou.aliyuncs.com"
```
### Multiple Search Engines Configuration
```yaml
Expand Down
2 changes: 1 addition & 1 deletion plugins/wasm-go/extensions/ai-search/engine/bing/bing.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func NewBingSearch(config *gjson.Result) (*BingSearch, error) {
}

func (b BingSearch) NeedExectue(ctx engine.SearchContext) bool {
return ctx.EngineType == "internet"
return ctx.EngineType == "" || ctx.EngineType == "internet"
}

func (b BingSearch) Client() wrapper.HttpClient {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func NewGoogleSearch(config *gjson.Result) (*GoogleSearch, error) {
}

func (g GoogleSearch) NeedExectue(ctx engine.SearchContext) bool {
return ctx.EngineType == "internet"
return ctx.EngineType == "" || ctx.EngineType == "internet"
}

func (g GoogleSearch) Client() wrapper.HttpClient {
Expand Down
194 changes: 194 additions & 0 deletions plugins/wasm-go/extensions/ai-search/engine/quark/quark.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
package quark

import (
"crypto/hmac"
"crypto/rand"
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"net/http"
"net/url"
"sort"
"strings"
"time"

"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"

"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
)

type QuarkSearch struct {
apiKey string
secretKey string
timeoutMillisecond uint32
client wrapper.HttpClient
count uint32
endpoint string
}

const (
Path = "/linked-retrieval/linked-retrieval-entry/v2/linkedRetrieval/commands/genericSearch"
ContentSha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" // for empty body
Action = "GenericSearch"
Version = "2024-11-11"
SignatureAlgorithm = "ACS3-HMAC-SHA256"
SignedHeaders = "host;x-acs-action;x-acs-content-sha256;x-acs-date;x-acs-signature-nonce;x-acs-version"
)

func urlEncoding(rawStr string) string {
encodedStr := url.PathEscape(rawStr)
encodedStr = strings.ReplaceAll(encodedStr, "+", "%2B")
encodedStr = strings.ReplaceAll(encodedStr, ":", "%3A")
encodedStr = strings.ReplaceAll(encodedStr, "=", "%3D")
encodedStr = strings.ReplaceAll(encodedStr, "&", "%26")
encodedStr = strings.ReplaceAll(encodedStr, "$", "%24")
encodedStr = strings.ReplaceAll(encodedStr, "@", "%40")
// encodedStr := url.QueryEscape(rawStr)
return encodedStr
}

func getSignature(stringToSign, secret string) string {
h := hmac.New(sha256.New, []byte(secret))
h.Write([]byte(stringToSign))
hash := h.Sum(nil)
return hex.EncodeToString(hash)
}

func getCanonicalHeaders(params map[string]string) string {
paramArray := []string{}
for k, v := range params {
paramArray = append(paramArray, k+":"+v)
}
sort.Slice(paramArray, func(i, j int) bool {
return paramArray[i] <= paramArray[j]
})
return strings.Join(paramArray, "\n") + "\n"
}

func getHasedString(input string) string {
hash := sha256.Sum256([]byte(input))
hashHex := hex.EncodeToString(hash[:])
return hashHex
}

func generateHexID(length int) (string, error) {
bytes := make([]byte, length/2)
if _, err := rand.Read(bytes); err != nil {
return "", err
}
return hex.EncodeToString(bytes), nil
}

func NewQuarkSearch(config *gjson.Result) (*QuarkSearch, error) {
engine := &QuarkSearch{}
engine.apiKey = config.Get("apiKey").String()
if engine.apiKey == "" {
return nil, errors.New("apiKey not found")
}
engine.secretKey = config.Get("secretKey").String()
if engine.secretKey == "" {
return nil, errors.New("secretKey not found")
}
serviceName := config.Get("serviceName").String()
if serviceName == "" {
return nil, errors.New("serviceName not found")
}
servicePort := config.Get("servicePort").Int()
if servicePort == 0 {
return nil, errors.New("servicePort not found")
}
engine.endpoint = config.Get("endpoint").String()
if engine.endpoint == "" {
engine.endpoint = "iqs.cn-zhangjiakou.aliyuncs.com"
}
engine.count = uint32(config.Get("count").Int())
if engine.count == 0 {
engine.count = 10
}
engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
FQDN: serviceName,
Port: servicePort,
})
engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
if engine.timeoutMillisecond == 0 {
engine.timeoutMillisecond = 5000
}
return engine, nil
}

func (g QuarkSearch) NeedExectue(ctx engine.SearchContext) bool {
return ctx.EngineType == "" || ctx.EngineType == "internet"
}

func (g QuarkSearch) Client() wrapper.HttpClient {
return g.client
}

func (g QuarkSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
query := strings.Join(ctx.Querys, " ")
canonicalURI := Path
queryParams := map[string]string{
"query": query,
"timeRange": "NoLimit",
}
queryParamsStr := []string{}
for k, v := range queryParams {
queryParamsStr = append(queryParamsStr, k+"="+urlEncoding(v))
}
canonicalQueryString := strings.Join(queryParamsStr, "&")
timeStamp := time.Now().UTC().Format("2006-01-02T15:04:05Z")
randomID, _ := generateHexID(32)
params := map[string]string{
"host": g.endpoint,
"x-acs-action": Action,
"x-acs-content-sha256": ContentSha256,
"x-acs-date": timeStamp,
"x-acs-signature-nonce": randomID,
"x-acs-version": Version,
}
canonicalHeaders := getCanonicalHeaders(params)
canonicalRequest := http.MethodGet + "\n" + canonicalURI + "\n" + canonicalQueryString + "\n" + canonicalHeaders + "\n" + SignedHeaders + "\n" + ContentSha256
stringToSign := SignatureAlgorithm + "\n" + getHasedString(canonicalRequest)

authHeaderFmt := "%s Credential=%s,SignedHeaders=%s,Signature=%s"
authHeader := fmt.Sprintf(authHeaderFmt, SignatureAlgorithm, g.apiKey, SignedHeaders, getSignature(stringToSign, g.secretKey))

reqParams := url.Values{}
for k, v := range queryParams {
reqParams.Add(k, v)
}
requestURL := fmt.Sprintf("https://%s%s?%s", g.endpoint, Path, reqParams.Encode())

return engine.CallArgs{
Method: http.MethodGet,
Url: requestURL,
Headers: [][2]string{
{"x-acs-date", timeStamp},
{"x-acs-signature-nonce", randomID},
{"x-acs-content-sha256", ContentSha256},
{"x-acs-version", Version},
{"x-acs-action", Action},
{"Authorization", authHeader},
},
Body: nil,
TimeoutMillisecond: g.timeoutMillisecond,
}
}

func (g QuarkSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
jsonObj := gjson.ParseBytes(response)
var results []engine.SearchResult
for index, item := range jsonObj.Get("pageItems").Array() {
result := engine.SearchResult{
Title: item.Get("title").String(),
Link: item.Get("link").String(),
Content: item.Get("mainText").String(),
}
if result.Valid() && index < int(g.count) {
results = append(results, result)
}
}
return results
}
11 changes: 10 additions & 1 deletion plugins/wasm-go/extensions/ai-search/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/bing"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/elasticsearch"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/google"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/quark"
)

type SearchRewrite struct {
Expand Down Expand Up @@ -166,6 +167,13 @@ func parseConfig(json gjson.Result, config *Config, log wrapper.Log) error {
}
config.engine = append(config.engine, searchEngine)
privateExists = true
case "quark":
searchEngine, err := quark.NewQuarkSearch(&e)
if err != nil {
return fmt.Errorf("elasticsearch search engine init failed:%s", err)
}
config.engine = append(config.engine, searchEngine)
internetExists = true
default:
return fmt.Errorf("unkown search engine:%s", e.Get("type").String())
}
Expand Down Expand Up @@ -541,7 +549,8 @@ func setReferencesToFirstMessage(ctx wrapper.HttpContext, chunk []byte, referenc
if len(messages) > 1 {
firstMessage := messages[0]
log.Debugf("first message: %s", firstMessage)
firstMessage = strings.TrimPrefix(firstMessage, "data: ")
firstMessage = strings.TrimPrefix(firstMessage, "data:")
firstMessage = strings.TrimPrefix(firstMessage, " ")
firstMessage = strings.TrimSuffix(firstMessage, "\n")
deltaContent := gjson.Get(firstMessage, "choices.0.delta.content")
modifiedMessage, err := sjson.Set(firstMessage, "choices.0.delta.content", fmt.Sprintf("%s\n\n%s", references, deltaContent))
Expand Down

0 comments on commit 3eda7de

Please sign in to comment.